Merge d512dd7391 into f6afaabcdf

2022-10-08 04:05:23 -04:00 · 2022-10-08 04:05:23 -04:00 · 02976ae6be
parent f6afaabcdf d512dd7391
commit 02976ae6be
2 changed files with 24 additions and 0 deletions
--- a/src/batchnorm_layer.c
+++ b/src/batchnorm_layer.c
@ -228,9 +228,28 @@ void forward_batchnorm_layer_gpu(layer l, network net)
        add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
 #endif
    } else {
+#ifdef CUDNN
+        float one = 1;
+        float zero = 0;
+        cudnnBatchNormalizationForwardInference(cudnn_handle(),
+            CUDNN_BATCHNORM_SPATIAL,
+            &one,
+            &zero,
+            l.dstTensorDesc,
+            l.x_gpu,
+            l.dstTensorDesc,
+            l.output_gpu,
+            l.normTensorDesc,
+            l.scales_gpu,
+            l.biases_gpu,
+            l.rolling_mean_gpu,
+            l.rolling_variance_gpu,
+            .00001);
+#else
        normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
        scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
        add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
+#endif
    }

 }
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -122,6 +122,11 @@ static size_t get_workspace_size(layer l){
 #ifdef CUDNN
 void cudnn_convolutional_setup(layer *l)
 {
+#if(CUDNN_MAJOR >= 7)
+    // Note: The library falls back to the default math mode CUDNN_DEFAULT_MATH when Tensor Core operations are not supported or not permitted.
+    cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH);
+#endif
+    
    cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 
    cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);