Skip to content

Commit

Permalink
Removed cublas_handle from a comment and unnecessary headers
Browse files Browse the repository at this point in the history
  • Loading branch information
konradkusiak97 committed Nov 7, 2024
1 parent c8420f6 commit ee669b8
Showing 1 changed file with 46 additions and 49 deletions.
95 changes: 46 additions & 49 deletions src/blas/backends/cublas/cublas_scope_handle.hpp
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
/***************************************************************************
* Copyright (C) Codeplay Software Limited
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* For your convenience, a copy of the License has been included in this
* repository.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**************************************************************************/
* Copyright (C) Codeplay Software Limited
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* For your convenience, a copy of the License has been included in this
* repository.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**************************************************************************/
#ifndef _CUBLAS_SCOPED_HANDLE_HPP_
#define _CUBLAS_SCOPED_HANDLE_HPP_
#if __has_include(<sycl/sycl.hpp>)
Expand All @@ -24,12 +24,9 @@
#include <CL/sycl.hpp>
#endif

#include <atomic>
#include "cublas_helper.hpp"
#include <memory>
#include <thread>
#include <unordered_map>
#include "cublas_helper.hpp"
#include "cublas_handle.hpp"

namespace oneapi {
namespace mkl {
Expand All @@ -38,45 +35,45 @@ namespace cublas {

/**
* @brief NVIDIA advise for handle creation:
https://devtalk.nvidia.com/default/topic/838794/gpu-accelerated libraries/using-cublas-in-different-cuda-streams/
According to NVIDIA:
1) It is required that different handles to be used for different devices:
http://docs.nvidia.com/cuda/cublas/index.html#cublas-context
2) It is recommended (but not required, if care is taken) that different handles be used for different host threads:
https://devtalk.nvidia.com/default/topic/838794/gpu-accelerated
libraries/using-cublas-in-different-cuda-streams/ According to NVIDIA: 1)
It is required that different handles to be used for different devices:
http://docs.nvidia.com/cuda/cublas/index.html#cublas-context
2) It is recommended (but not required, if care is taken) that different
handles be used for different host threads:
http://docs.nvidia.com/cuda/cublas/index.html#thread-safety2changeme
3) It is neither required nor recommended that different handles be used for different streams on the same device,
using the same host thread.
3) It is neither required nor recommended that different handles be used
for different streams on the same device, using the same host thread.
**/

class CublasScopedContextHandler {
sycl::interop_handle& ih;
CUdevice nativeDevice;
static thread_local std::shared_ptr<cublasHandle_t> cublasHandle;
CUstream get_stream(const sycl::queue& queue);
sycl::context get_context(const sycl::queue& queue);
sycl::interop_handle &ih;
CUdevice nativeDevice;
static thread_local std::shared_ptr<cublasHandle_t> cublasHandle;
CUstream get_stream(const sycl::queue &queue);
sycl::context get_context(const sycl::queue &queue);

public:
CublasScopedContextHandler(sycl::interop_handle& ih);
CublasScopedContextHandler(sycl::interop_handle &ih);

/**
* @brief get_handle: creates the handle by implicitly impose the advice
* given by nvidia for creating a cublas_handle. (e.g. one cuStream per device
* per thread).
/**
* @brief get_handle: returns the handle assigned to the device from
* sycl::queue.
* @param queue sycl queue.
* @return cublasHandle_t a handle to construct cublas routines
*/
cublasHandle_t get_handle(const sycl::queue& queue);
// This is a work-around function for reinterpret_casting the memory. This
// will be fixed when SYCL-2020 has been implemented for Pi backend.
template <typename T, typename U>
inline T get_mem(U acc) {
CUdeviceptr cudaPtr = ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(acc);
return reinterpret_cast<T>(cudaPtr);
}
cublasHandle_t get_handle(const sycl::queue &queue);
// This is a work-around function for reinterpret_casting the memory. This
// will be fixed when SYCL-2020 has been implemented for Pi backend.
template <typename T, typename U> inline T get_mem(U acc) {
CUdeviceptr cudaPtr =
ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(acc);
return reinterpret_cast<T>(cudaPtr);
}

void wait_stream(const sycl::queue& queue) {
cuStreamSynchronize(get_stream(queue));
}
void wait_stream(const sycl::queue &queue) {
cuStreamSynchronize(get_stream(queue));
}
};

} // namespace cublas
Expand Down

0 comments on commit ee669b8

Please sign in to comment.