From edfdf2c20e961c0e72774382db827787c67356d6 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 5 Feb 2019 19:33:10 +0300 Subject: [PATCH] Fixed bug in Tensor Cores training --- src/convolutional_kernels.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index b2fb6344..03f21609 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -139,7 +139,7 @@ __global__ void cuda_f32_to_f16(float* input_f32, size_t size, half *output_f16) } void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16) { - cuda_f32_to_f16 <<< get_number_of_blocks(size, BLOCK), BLOCK, 0, get_cuda_stream() >>> (input_f32, size, (half *)output_f16); + cuda_f32_to_f16 <<< size / BLOCK + 1, BLOCK, 0, get_cuda_stream() >>> (input_f32, size, (half *)output_f16); CHECK_CUDA(cudaPeekAtLastError()); } @@ -151,7 +151,7 @@ __global__ void cuda_f16_to_f32(half* input_f16, size_t size, float *output_f32) } void cuda_convert_f16_to_f32(float* input_f16, size_t size, float *output_f32) { - cuda_f16_to_f32 <<< get_number_of_blocks(size, BLOCK), BLOCK, 0, get_cuda_stream() >>> ((half *)input_f16, size, output_f32); + cuda_f16_to_f32 <<< size / BLOCK + 1, BLOCK, 0, get_cuda_stream() >>> ((half *)input_f16, size, output_f32); CHECK_CUDA(cudaPeekAtLastError()); }