Added support for Tensor Cores CC >= 7.0 (V100). For FP16/32 (mixed precision) define CUDNN_HALF should be used.

2023-08-10 21:13:14 +03:00 · 2018-02-25 16:29:44 +03:00
parent 85eafd3d59
commit cad4d1618f
8 changed files with 123 additions and 18 deletions
--- a/src/network.c
+++ b/src/network.c
@ -316,6 +316,8 @@ void set_batch_network(network *net, int b)
        net->layers[i].batch = b;
 #ifdef CUDNN
        if(net->layers[i].type == CONVOLUTIONAL){
+			cudnn_convolutional_setup(net->layers + i, cudnn_fastest);
+			/*
 			layer *l = net->layers + i;
            cudnn_convolutional_setup(l, cudnn_fastest);
 			// check for excessive memory consumption 
@ -327,6 +329,7 @@ void set_batch_network(network *net, int b)
 				cudnn_convolutional_setup(l, cudnn_smallest);
 				l->workspace_size = get_workspace_size(*l);
 			}
+			*/
        }
 #endif
    }