From e8c3905e547f8fb7f0d3d0f297569453461b5b8f Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 28 Feb 2019 20:38:13 +0300 Subject: [PATCH] Functions for fixing nan --- src/blas.h | 3 +++ src/blas_kernels.cu | 47 ++++++++++++++++++++++++++++++++++++++++++ src/cuda.c | 8 +++++-- src/network_kernels.cu | 12 +++++++++++ 4 files changed, 68 insertions(+), 2 deletions(-) diff --git a/src/blas.h b/src/blas.h index 19b72b76..12d4b9a3 100644 --- a/src/blas.h +++ b/src/blas.h @@ -102,6 +102,9 @@ void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int for void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void fix_nan_and_inf(float *input, size_t size); +int is_nan_or_inf(float *input, size_t size); + #endif #ifdef __cplusplus } diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 45ed09cd..98592c81 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -975,3 +975,50 @@ extern "C" void softmax_tree_gpu(float *input, int spatial, int batch, int strid cuda_free((float *)tree_groups_size); cuda_free((float *)tree_groups_offset); } + + +__global__ void fix_nan_and_inf_kernel(float *input, size_t size) +{ + const int index = blockIdx.x*blockDim.x + threadIdx.x; + if (index < size) { + float val = input[index]; + if (isnan(val) || isinf(val)) + input[index] = index; // pseudo random value + } +} + +extern "C" void fix_nan_and_inf(float *input, size_t size) +{ + const int block_size = BLOCK; + const int num_blocks = get_number_of_blocks(size, block_size); + fix_nan_and_inf_kernel << > >(input, size); + CHECK_CUDA(cudaPeekAtLastError()); + //CHECK_CUDA(cudaDeviceSynchronize()); +} + + +__global__ void is_nan_or_inf_kernel(float *input, size_t size, int *pinned_return) +{ + const int index = blockIdx.x*blockDim.x + threadIdx.x; + if (index < size) { + float val = input[index]; + if (isnan(val) || isinf(val)) + *pinned_return = 1; + } +} + +extern "C" int is_nan_or_inf(float *input, size_t size) +{ + int *pinned_return; + CHECK_CUDA(cudaHostAlloc(&pinned_return, sizeof(int), cudaHostRegisterMapped)); + *pinned_return = 0; + + const int block_size = BLOCK; + const int num_blocks = get_number_of_blocks(size, block_size); + is_nan_or_inf_kernel << > >(input, size, pinned_return); + CHECK_CUDA(cudaDeviceSynchronize()); + int ret_val = *pinned_return; + + CHECK_CUDA(cudaFreeHost(pinned_return)); + return ret_val; +} \ No newline at end of file diff --git a/src/cuda.c b/src/cuda.c index 87402e5a..9e7745e2 100644 --- a/src/cuda.c +++ b/src/cuda.c @@ -59,8 +59,10 @@ void check_error(cudaError_t status) void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time) { - if (status != cudaSuccess) + if (status != cudaSuccess) { printf("CUDA status Error: file: %s() : line: %d : build time: %s \n", file, line, date_time); + check_error(status); + } #ifdef DEBUG status = cudaDeviceSynchronize(); if (status != cudaSuccess) @@ -175,8 +177,10 @@ void cudnn_check_error(cudnnStatus_t status) void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time) { - if (status != CUDNN_STATUS_SUCCESS) + if (status != CUDNN_STATUS_SUCCESS) { printf("\n cuDNN status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); + cudnn_check_error(status); + } #ifdef DEBUG status = cudaDeviceSynchronize(); if (status != CUDNN_STATUS_SUCCESS) diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 2c016697..619b8742 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -110,6 +110,18 @@ void backward_network_gpu(network net, network_state state) state.delta = prev.delta_gpu; } l.backward_gpu(l, state); + + /* + if(i != 0) + { + layer l = net.layers[i - 1]; + int state_delta_nan_inf = is_nan_or_inf(state.delta, l.outputs * l.batch); + int state_input_nan_inf = is_nan_or_inf(state.input, l.outputs * l.batch); + printf("\n i - %d is_nan_or_inf(s.delta) = %d \n", i, state_delta_nan_inf); + printf(" i - %d is_nan_or_inf(s.input) = %d \n", i, state_input_nan_inf); + if (state_delta_nan_inf || state_input_nan_inf) { printf(" found "); getchar(); } + } + */ } }