Improve training performance - batch-norm using cuDNN.

2023-08-10 21:13:14 +03:00 · 2018-03-20 02:16:51 +03:00
parent 2f52cfeb07
commit 537d135feb
12 changed files with 193 additions and 42 deletions
--- a/src/cuda.h
+++ b/src/cuda.h
@ -19,19 +19,25 @@ extern int gpu_index;
 #include "cudnn.h"
 #endif

-void check_error(cudaError_t status);
-cublasHandle_t blas_handle();
-float *cuda_make_array(float *x, size_t n);
-int *cuda_make_int_array(size_t n);
-void cuda_push_array(float *x_gpu, float *x, size_t n);
-void cuda_pull_array(float *x_gpu, float *x, size_t n);
-void cuda_set_device(int n);
-int cuda_get_device();
-void cuda_free(float *x_gpu);
-void cuda_random(float *x_gpu, size_t n);
-float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
-dim3 cuda_gridsize(size_t n);
-cudaStream_t get_cuda_stream();
+#ifdef __cplusplus
+extern "C" {
+#endif
+	void check_error(cudaError_t status);
+	cublasHandle_t blas_handle();
+	float *cuda_make_array(float *x, size_t n);
+	int *cuda_make_int_array(size_t n);
+	void cuda_push_array(float *x_gpu, float *x, size_t n);
+	void cuda_pull_array(float *x_gpu, float *x, size_t n);
+	void cuda_set_device(int n);
+	int cuda_get_device();
+	void cuda_free(float *x_gpu);
+	void cuda_random(float *x_gpu, size_t n);
+	float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
+	dim3 cuda_gridsize(size_t n);
+	cudaStream_t get_cuda_stream();
+#ifdef __cplusplus
+}
+#endif

 #ifdef CUDNN
 cudnnHandle_t cudnn_handle();