diff --git a/MATLAB/Source/POCS_TV.cu b/MATLAB/Source/POCS_TV.cu
index 4a3861ec..e34a589b 100644
--- a/MATLAB/Source/POCS_TV.cu
+++ b/MATLAB/Source/POCS_TV.cu
@@ -183,7 +183,7 @@ do { \
         __syncthreads();
         
         
-#if (__CUDA_ARCH__ >= 300)
+#if (__CUDART_VERSION >= 9000)
         if ( tid < 32 )
         {
             mySum = sdata[tid] + sdata[tid + 32];
@@ -231,7 +231,7 @@ do { \
         __syncthreads();
         
         
-#if (__CUDA_ARCH__ >= 300)
+#if (__CUDART_VERSION >= 9000)
         if ( tid < 32 )
         {
             mySum = sdata[tid] + sdata[tid + 32];
@@ -397,9 +397,11 @@ do { \
         
         
         // Lets try to make the host memory pinned:
-        // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-        int isHostRegisterSupported;
+        // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+        int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
         cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
         // splits>2 is completely empirical observation
         if (isHostRegisterSupported & splits>2){
             cudaHostRegister(img ,image_size[2]*image_size[1]*image_size[0]*sizeof(float),cudaHostRegisterPortable);
diff --git a/MATLAB/Source/POCS_TV2.cu b/MATLAB/Source/POCS_TV2.cu
index 0f2e0b00..8eca93fc 100644
--- a/MATLAB/Source/POCS_TV2.cu
+++ b/MATLAB/Source/POCS_TV2.cu
@@ -204,7 +204,7 @@ do { \
         __syncthreads();
         
         
-#if (__CUDA_ARCH__ >= 300)
+#if (__CUDART_VERSION >= 9000)
         if ( tid < 32 )
         {
             mySum = sdata[tid] + sdata[tid + 32];
@@ -252,7 +252,7 @@ do { \
         __syncthreads();
         
         
-#if (__CUDA_ARCH__ >= 300)
+#if (__CUDART_VERSION >= 9000)
         if ( tid < 32 )
         {
             mySum = sdata[tid] + sdata[tid + 32];
@@ -417,9 +417,11 @@ void aw_pocs_tv(float* img,float* dst,float alpha,const long* image_size, int ma
         
         
         // Lets try to make the host memory pinned:
-        // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-        int isHostRegisterSupported;
+        // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+        int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
         cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
         // splits>2 is completely empirical observation
         if (isHostRegisterSupported & splits>2){
             cudaHostRegister(img ,image_size[2]*image_size[1]*image_size[0]*sizeof(float),cudaHostRegisterPortable);
diff --git a/MATLAB/Source/Siddon_projection.cu b/MATLAB/Source/Siddon_projection.cu
index b74f2f54..60a798cc 100644
--- a/MATLAB/Source/Siddon_projection.cu
+++ b/MATLAB/Source/Siddon_projection.cu
@@ -349,9 +349,11 @@ int siddon_ray_projection(float  *  img, Geometry geo, float** result,float cons
     
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     // empirical testing shows that when the image split is smaller than 1 (also implies the image is not very big), the time to
     // pin the memory is greater than the lost time in Synchronously launching the memcpys. This is only worth it when the image is too big.
     if (isHostRegisterSupported & (splits>1 |deviceCount>1)){
diff --git a/MATLAB/Source/ray_interpolated_projection.cu b/MATLAB/Source/ray_interpolated_projection.cu
index f96b30e0..3fd2155c 100644
--- a/MATLAB/Source/ray_interpolated_projection.cu
+++ b/MATLAB/Source/ray_interpolated_projection.cu
@@ -290,9 +290,11 @@ int interpolation_projection(float  *  img, Geometry geo, float** result,float c
     
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     // empirical testing shows that when the image split is smaller than 1 (also implies the image is not very big), the time to
     // pin the memory is greater than the lost time in Synchronously launching the memcpys. This is only worth it when the image is too big.
     if (isHostRegisterSupported & splits>1){
diff --git a/MATLAB/Source/tvdenoising.cu b/MATLAB/Source/tvdenoising.cu
index 1c4d895e..6f446967 100644
--- a/MATLAB/Source/tvdenoising.cu
+++ b/MATLAB/Source/tvdenoising.cu
@@ -262,9 +262,11 @@ do { \
         
         
         // Lets try to make the host memory pinned:
-        // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-        int isHostRegisterSupported;
+        // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+        int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
         cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
         if (isHostRegisterSupported & splits>1){
             cudaHostRegister(src ,image_size[2]*image_size[1]*image_size[0]*sizeof(float),cudaHostRegisterPortable);
             cudaHostRegister(dst ,image_size[2]*image_size[1]*image_size[0]*sizeof(float),cudaHostRegisterPortable);
diff --git a/MATLAB/Source/voxel_backprojection.cu b/MATLAB/Source/voxel_backprojection.cu
index 03cdd8b5..99a3001f 100644
--- a/MATLAB/Source/voxel_backprojection.cu
+++ b/MATLAB/Source/voxel_backprojection.cu
@@ -311,9 +311,11 @@ int voxel_backprojection(float  *  projections, Geometry geo, float* result,floa
     cudaCheckErrors("Error");
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     // empirical testing shows that when the image split is smaller than 1 (also implies the image is not very big), the time to
     // pin the memory is greater than the lost time in Synchronously launching the memcpys. This is only worth it when the image is too big.
     if (isHostRegisterSupported & (split_image>1 |deviceCount>1)){
diff --git a/MATLAB/Source/voxel_backprojection2.cu b/MATLAB/Source/voxel_backprojection2.cu
index 128c6b04..de920137 100644
--- a/MATLAB/Source/voxel_backprojection2.cu
+++ b/MATLAB/Source/voxel_backprojection2.cu
@@ -352,9 +352,11 @@ int voxel_backprojection2(float * projections, Geometry geo, float* result,float
     
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     // empirical testing shows that when the image split is smaller than 1 (also implies the image is not very big), the time to
     // pin the memory is greater than the lost time in Synchronously launching the memcpys. This is only worth it when the image is too big.
     if (isHostRegisterSupported & split_image>1){
diff --git a/MATLAB/Source/voxel_backprojection_parallel.cu b/MATLAB/Source/voxel_backprojection_parallel.cu
index 08982256..98faab16 100644
--- a/MATLAB/Source/voxel_backprojection_parallel.cu
+++ b/MATLAB/Source/voxel_backprojection_parallel.cu
@@ -298,9 +298,11 @@ int voxel_backprojection_parallel(float  *  projections, Geometry geo, float* re
     }
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     if (isHostRegisterSupported){
         cudaHostRegister(projections, (size_t)geo.nDetecU*(size_t)geo.nDetecV*(size_t)nalpha*(size_t)sizeof(float),cudaHostRegisterPortable);
     }
diff --git a/Python/setup.py b/Python/setup.py
index 261d96f2..714b2bd7 100644
--- a/Python/setup.py
+++ b/Python/setup.py
@@ -16,7 +16,7 @@
 
 # Code from https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py
 COMPUTE_CAPABILITY_ARGS = [  # '-gencode=arch=compute_20,code=sm_20', #deprecated
-    #'-gencode=arch=compute_30,code=sm_30',#deprecated
+    '-gencode=arch=compute_30,code=sm_30',
     '-gencode=arch=compute_37,code=sm_37',
     '-gencode=arch=compute_52,code=sm_52',
     '-gencode=arch=compute_60,code=sm_60',
diff --git a/Python/tigre/Source/POCS_TV.cu b/Python/tigre/Source/POCS_TV.cu
index 4a3861ec..e34a589b 100644
--- a/Python/tigre/Source/POCS_TV.cu
+++ b/Python/tigre/Source/POCS_TV.cu
@@ -183,7 +183,7 @@ do { \
         __syncthreads();
         
         
-#if (__CUDA_ARCH__ >= 300)
+#if (__CUDART_VERSION >= 9000)
         if ( tid < 32 )
         {
             mySum = sdata[tid] + sdata[tid + 32];
@@ -231,7 +231,7 @@ do { \
         __syncthreads();
         
         
-#if (__CUDA_ARCH__ >= 300)
+#if (__CUDART_VERSION >= 9000)
         if ( tid < 32 )
         {
             mySum = sdata[tid] + sdata[tid + 32];
@@ -397,9 +397,11 @@ do { \
         
         
         // Lets try to make the host memory pinned:
-        // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-        int isHostRegisterSupported;
+        // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+        int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
         cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
         // splits>2 is completely empirical observation
         if (isHostRegisterSupported & splits>2){
             cudaHostRegister(img ,image_size[2]*image_size[1]*image_size[0]*sizeof(float),cudaHostRegisterPortable);
diff --git a/Python/tigre/Source/POCS_TV2.cu b/Python/tigre/Source/POCS_TV2.cu
index 0f2e0b00..8eca93fc 100644
--- a/Python/tigre/Source/POCS_TV2.cu
+++ b/Python/tigre/Source/POCS_TV2.cu
@@ -204,7 +204,7 @@ do { \
         __syncthreads();
         
         
-#if (__CUDA_ARCH__ >= 300)
+#if (__CUDART_VERSION >= 9000)
         if ( tid < 32 )
         {
             mySum = sdata[tid] + sdata[tid + 32];
@@ -252,7 +252,7 @@ do { \
         __syncthreads();
         
         
-#if (__CUDA_ARCH__ >= 300)
+#if (__CUDART_VERSION >= 9000)
         if ( tid < 32 )
         {
             mySum = sdata[tid] + sdata[tid + 32];
@@ -417,9 +417,11 @@ void aw_pocs_tv(float* img,float* dst,float alpha,const long* image_size, int ma
         
         
         // Lets try to make the host memory pinned:
-        // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-        int isHostRegisterSupported;
+        // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+        int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
         cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
         // splits>2 is completely empirical observation
         if (isHostRegisterSupported & splits>2){
             cudaHostRegister(img ,image_size[2]*image_size[1]*image_size[0]*sizeof(float),cudaHostRegisterPortable);
diff --git a/Python/tigre/Source/Siddon_projection.cu b/Python/tigre/Source/Siddon_projection.cu
index 04896ad2..d611d0d1 100644
--- a/Python/tigre/Source/Siddon_projection.cu
+++ b/Python/tigre/Source/Siddon_projection.cu
@@ -353,9 +353,11 @@ int siddon_ray_projection(float  *  img, Geometry geo, float** result,float cons
     
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     // empirical testing shows that when the image split is smaller than 1 (also implies the image is not very big), the time to
     // pin the memory is greater than the lost time in Synchronously launching the memcpys. This is only worth it when the image is too big.
     if (isHostRegisterSupported & (splits>1 |deviceCount>1)){
diff --git a/Python/tigre/Source/ray_interpolated_projection.cu b/Python/tigre/Source/ray_interpolated_projection.cu
index 2df2b6e0..0a0e64b4 100644
--- a/Python/tigre/Source/ray_interpolated_projection.cu
+++ b/Python/tigre/Source/ray_interpolated_projection.cu
@@ -295,9 +295,11 @@ int interpolation_projection(float  *  img, Geometry geo, float** result,float c
     
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     // empirical testing shows that when the image split is smaller than 1 (also implies the image is not very big), the time to
     // pin the memory is greater than the lost time in Synchronously launching the memcpys. This is only worth it when the image is too big.
     if (isHostRegisterSupported & splits>1){
diff --git a/Python/tigre/Source/tvdenoising.cu b/Python/tigre/Source/tvdenoising.cu
index 1c4d895e..6f446967 100644
--- a/Python/tigre/Source/tvdenoising.cu
+++ b/Python/tigre/Source/tvdenoising.cu
@@ -262,9 +262,11 @@ do { \
         
         
         // Lets try to make the host memory pinned:
-        // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-        int isHostRegisterSupported;
+        // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+        int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
         cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
         if (isHostRegisterSupported & splits>1){
             cudaHostRegister(src ,image_size[2]*image_size[1]*image_size[0]*sizeof(float),cudaHostRegisterPortable);
             cudaHostRegister(dst ,image_size[2]*image_size[1]*image_size[0]*sizeof(float),cudaHostRegisterPortable);
diff --git a/Python/tigre/Source/voxel_backprojection.cu b/Python/tigre/Source/voxel_backprojection.cu
index 60b4bb9c..4ab87021 100644
--- a/Python/tigre/Source/voxel_backprojection.cu
+++ b/Python/tigre/Source/voxel_backprojection.cu
@@ -317,9 +317,11 @@ int voxel_backprojection(float  *  projections, Geometry geo, float* result,floa
     cudaCheckErrors("Error");
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     // empirical testing shows that when the image split is smaller than 1 (also implies the image is not very big), the time to
     // pin the memory is greater than the lost time in Synchronously launching the memcpys. This is only worth it when the image is too big.
     if (isHostRegisterSupported & (split_image>1 |deviceCount>1)){
diff --git a/Python/tigre/Source/voxel_backprojection2.cu b/Python/tigre/Source/voxel_backprojection2.cu
index 204051bf..3bc6b3e7 100644
--- a/Python/tigre/Source/voxel_backprojection2.cu
+++ b/Python/tigre/Source/voxel_backprojection2.cu
@@ -356,9 +356,12 @@ int voxel_backprojection2(float * projections, Geometry geo, float* result,float
     
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
+
     // empirical testing shows that when the image split is smaller than 1 (also implies the image is not very big), the time to
     // pin the memory is greater than the lost time in Synchronously launching the memcpys. This is only worth it when the image is too big.
     if (isHostRegisterSupported & split_image>1){
diff --git a/Python/tigre/Source/voxel_backprojection_parallel.cu b/Python/tigre/Source/voxel_backprojection_parallel.cu
index c48fb75f..e05dd376 100644
--- a/Python/tigre/Source/voxel_backprojection_parallel.cu
+++ b/Python/tigre/Source/voxel_backprojection_parallel.cu
@@ -302,9 +302,11 @@ int voxel_backprojection_parallel(float  *  projections, Geometry geo, float* re
     }
     //Pagelock memory for synchronous copy.
     // Lets try to make the host memory pinned:
-    // We laredy queried the GPU and assuemd they are the same, thus should have the same attributes.
-    int isHostRegisterSupported;
+    // We laredy queried the GPU and assuemd they are the same, thus shoudl have the same attributes.
+    int isHostRegisterSupported = 0;
+#if CUDART_VERSION >= 9020
     cudaDeviceGetAttribute(&isHostRegisterSupported,cudaDevAttrHostRegisterSupported,0);
+#endif
     if (isHostRegisterSupported){
         cudaHostRegister(projections, (size_t)geo.nDetecU*(size_t)geo.nDetecV*(size_t)nalpha*(size_t)sizeof(float),cudaHostRegisterPortable);
     }