From 262ac8f770012a23aa58099816c430e2a6abef11 Mon Sep 17 00:00:00 2001 From: Tomoyuki SADAKANE <40597344+tsadakane@users.noreply.github.com> Date: Sun, 8 Jan 2023 20:30:06 +0900 Subject: [PATCH] Add CUDA implementation of FBP filtering * Modified and added Python files --- Python/setup.py | 22 ++++++- .../algorithms/single_pass_algorithms.py | 5 +- .../cuda_interface/_FbpFiltration.pyx | 57 +++++++++++++++++ Python/tigre/utilities/filtering.py | 61 +++++++++++-------- 4 files changed, 118 insertions(+), 27 deletions(-) create mode 100644 Python/tigre/utilities/cuda_interface/_FbpFiltration.pyx diff --git a/Python/setup.py b/Python/setup.py index d88f8f1f..a5b7c0c0 100644 --- a/Python/setup.py +++ b/Python/setup.py @@ -486,6 +486,26 @@ def include_headers(filename_list, sdist=False): ) +FbpFiltration_ext = Extension( + "_FbpFiltration", + sources=include_headers( + [ + "../Common/CUDA/TIGRE_common.cpp", + "../Common/CUDA/FbpFiltration.cu", + "../Common/CUDA/GpuIds.cpp", + "../Common/CUDA/gpuUtils.cu", + "tigre/utilities/cuda_interface/_FbpFiltration.pyx", + ], + sdist=sys.argv[1] == "sdist", + ), + define_macros=[("IS_FOR_PYTIGRE", None)], + library_dirs=[CUDA["lib64"]], + libraries=["cudart", "cufft"], + language="c++", + runtime_library_dirs=[CUDA["lib64"]] if not IS_WINDOWS else None, + include_dirs=[NUMPY_INCLUDE, CUDA["include"], "../Common/CUDA/"], +) + setup( name="pytigre", version="2.2.0", @@ -493,7 +513,7 @@ def include_headers(filename_list, sdist=False): packages=find_packages(), include_package_data=True, data_files=[("data", ["../Common/data/head.mat"])], - ext_modules=[Ax_ext, Atb_ext, tvdenoising_ext, minTV_ext, AwminTV_ext, gpuUtils_ext, RandomNumberGenerator_ext], + ext_modules=[Ax_ext, Atb_ext, tvdenoising_ext, minTV_ext, AwminTV_ext, gpuUtils_ext, RandomNumberGenerator_ext, FbpFiltration_ext], py_modules=["tigre.py"], cmdclass={"build_ext": BuildExtension}, install_requires=["Cython", "matplotlib", "numpy", "scipy", "tqdm"], diff --git a/Python/tigre/algorithms/single_pass_algorithms.py b/Python/tigre/algorithms/single_pass_algorithms.py index 3fac0621..b15030fe 100644 --- a/Python/tigre/algorithms/single_pass_algorithms.py +++ b/Python/tigre/algorithms/single_pass_algorithms.py @@ -73,6 +73,9 @@ def FDK(proj, geo, angles, **kwargs): verbose = kwargs["verbose"] if "verbose" in kwargs else False gpuids = kwargs["gpuids"] if "gpuids" in kwargs else None + + use_gpu_for_filtration = kwargs["use_gpu_for_filtration"] if "use_gpu_for_filtration" in kwargs else True + geo = copy.deepcopy(geo) geo.check_geo(angles) geo.checknans() @@ -88,7 +91,7 @@ def FDK(proj, geo, angles, **kwargs): w = geo.DSD[0] / np.sqrt((geo.DSD[0] ** 2 + xx ** 2 + yy ** 2)) np.multiply(proj, w, out=proj_filt) - proj_filt = filtering(proj_filt, geo, angles, parker=False, verbose=verbose) + proj_filt = filtering(proj_filt, geo, angles, parker=False, verbose=verbose, use_gpu=use_gpu_for_filtration, gpuids=gpuids) return Atb(proj_filt, geo, geo.angles, "FDK", gpuids=gpuids) diff --git a/Python/tigre/utilities/cuda_interface/_FbpFiltration.pyx b/Python/tigre/utilities/cuda_interface/_FbpFiltration.pyx new file mode 100644 index 00000000..6705b219 --- /dev/null +++ b/Python/tigre/utilities/cuda_interface/_FbpFiltration.pyx @@ -0,0 +1,57 @@ +# This file is part of the TIGRE Toolbox + +# Copyright (c) 2015, University of Bath and +# CERN-European Organization for Nuclear Research +# All rights reserved. + +# License: Open Source under BSD. +# See the full license at +# https://github.com/CERN/TIGRE/license.txt + +# Contact: tigre.toolbox@gmail.com +# Codes: https://github.com/CERN/TIGRE/ +# -------------------------------------------------------------------------- +# Coded by: Tomoyuki SADAKANE + +cimport numpy as np +import numpy as np +from tigre.utilities.errors import TigreCudaCallError +from tigre.utilities.cuda_interface._gpuUtils cimport GpuIds as c_GpuIds, convert_to_c_gpuids, free_c_gpuids + +np.import_array() + +from libc.stdlib cimport malloc, free + +cdef extern from "numpy/arrayobject.h": + void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) + void PyArray_CLEARFLAGS(np.ndarray arr, int flags) + +cdef extern from "FbpFiltration.hpp": + cdef void apply_filtration(const float* pf32In, size_t uiXLen, size_t uiYLen, float* pf32Filter, float* pfOut, const c_GpuIds gpuids) + +def cuda_raise_errors(error_code): + if error_code: + raise TigreCudaCallError('FFT::', error_code) + +def apply(np.ndarray[np.float32_t, ndim=2] src, np.ndarray[np.float32_t, ndim=1] flt, gpuids=None): + cdef c_GpuIds* c_gpuids = convert_to_c_gpuids(gpuids) + if not c_gpuids: + raise MemoryError() + + cdef np.npy_intp size_img[2] + size_img[0]= src.shape[0] + size_img[1]= src.shape[1] + + cdef float* c_imgout = malloc(size_img[0] *size_img[1]* sizeof(float)) + + src = np.ascontiguousarray(src) + flt = np.ascontiguousarray(flt) + + cdef float* c_src = src.data + cdef float* c_flt = flt.data + cdef size_t uiXLen = size_img[1] + cdef size_t uiYLen = size_img[0] + cuda_raise_errors(apply_filtration(c_src, uiXLen, uiYLen, c_flt, c_imgout, c_gpuids[0])) + imgout = np.PyArray_SimpleNewFromData(2, size_img, np.NPY_FLOAT32, c_imgout) + PyArray_ENABLEFLAGS(imgout, np.NPY_OWNDATA) + return imgout diff --git a/Python/tigre/utilities/filtering.py b/Python/tigre/utilities/filtering.py index f7aedc17..38256f30 100644 --- a/Python/tigre/utilities/filtering.py +++ b/Python/tigre/utilities/filtering.py @@ -9,10 +9,11 @@ import numpy as np from tigre.utilities.parkerweight import parkerweight +import _FbpFiltration as fbpfiltration # TODO: Fix parker -def filtering(proj, geo, angles, parker, verbose=False): +def filtering(proj, geo, angles, parker, verbose=False, use_gpu=True, gpuids=None): if parker: proj=parkerweight(proj.transpose(0,2,1),geo,angles,parker).transpose(0,2,1) @@ -21,34 +22,44 @@ def filtering(proj, geo, angles, parker, verbose=False): d=1 filt=filter(geo.filter,ramp_kernel[0],filt_len,d,verbose=verbose) - filt=np.kron(np.ones((np.int64(geo.nDetector[0]),1),dtype=np.float32),filt) padding = int((filt_len-geo.nDetector[1])//2 ) scale_factor = (geo.DSD[0]/geo.DSO[0]) * (2 * np.pi/ len(angles)) / ( 4 * geo.dDetector[0] ) + if use_gpu: + fproj=np.empty((geo.nDetector[0],filt_len),dtype=np.float32) + for idx_proj in range(0,angles.shape[0]): + fproj.fill(0) + fproj[:,padding:padding+geo.nDetector[1]]=proj[idx_proj] - #filter 2 projection at a time packing in to complex container - fproj=np.empty((geo.nDetector[0],filt_len),dtype=np.complex64) - for i in range(0,angles.shape[0]-1,2): - fproj.fill(0) - fproj.real[:,padding:padding+geo.nDetector[1]]=proj[i] - fproj.imag[:,padding:padding+geo.nDetector[1]]=proj[i+1] - - fproj=fft(fproj,axis=1) - fproj=fproj*filt - fproj=ifft(fproj,axis=1) - - proj[i]=fproj.real[:,padding:padding+geo.nDetector[1]] * scale_factor - proj[i+1]=fproj.imag[:,padding:padding+geo.nDetector[1]] * scale_factor - - #if odd number of projections filter last solo - if angles.shape[0] % 2: - fproj.fill(0) - fproj.real[:,padding:padding+geo.nDetector[1]]=proj[angles.shape[0]-1] - - fproj=fft(fproj,axis=1) - fproj=fproj*filt - fproj=np.real(ifft(fproj,axis=1)) - proj[angles.shape[0]-1]=fproj[:,padding:padding+geo.nDetector[1]] * scale_factor + fproj = fbpfiltration.apply(fproj, filt, gpuids) + + proj[idx_proj]=fproj[:,padding:padding+geo.nDetector[1]] * scale_factor + else: + filt=np.kron(np.ones((np.int64(geo.nDetector[0]),1),dtype=np.float32),filt) + + #filter 2 projection at a time packing in to complex container + fproj=np.empty((geo.nDetector[0],filt_len),dtype=np.complex64) + for i in range(0,angles.shape[0]-1,2): + fproj.fill(0) + fproj.real[:,padding:padding+geo.nDetector[1]]=proj[i] + fproj.imag[:,padding:padding+geo.nDetector[1]]=proj[i+1] + + fproj=fft(fproj,axis=1) + fproj=fproj*filt + fproj=ifft(fproj,axis=1) + + proj[i]=fproj.real[:,padding:padding+geo.nDetector[1]] * scale_factor + proj[i+1]=fproj.imag[:,padding:padding+geo.nDetector[1]] * scale_factor + + #if odd number of projections filter last solo + if angles.shape[0] % 2: + fproj.fill(0) + fproj.real[:,padding:padding+geo.nDetector[1]]=proj[angles.shape[0]-1] + + fproj=fft(fproj,axis=1) + fproj=fproj*filt + fproj=np.real(ifft(fproj,axis=1)) + proj[angles.shape[0]-1]=fproj[:,padding:padding+geo.nDetector[1]] * scale_factor return proj