Added support for Tensor Cores CC >= 7.0 (V100). For FP16/32 (mixed precision) define CUDNN_HALF should be used.

2023-08-10 21:13:14 +03:00 · 2018-02-25 16:29:44 +03:00
parent 85eafd3d59
commit cad4d1618f
8 changed files with 123 additions and 18 deletions
--- a/src/layer.c
+++ b/src/layer.c
@ -83,6 +83,8 @@ void free_layer(layer l)
 	if (l.x_norm_gpu)              cuda_free(l.x_norm_gpu);
 	if (l.weights_gpu)             cuda_free(l.weights_gpu);
 	if (l.weight_updates_gpu)      cuda_free(l.weight_updates_gpu);
+	if (l.weights_gpu16)           cuda_free(l.weights_gpu16);
+	if (l.weight_updates_gpu16)    cuda_free(l.weight_updates_gpu16);
 	if (l.biases_gpu)              cuda_free(l.biases_gpu);
 	if (l.bias_updates_gpu)        cuda_free(l.bias_updates_gpu);
 	if (l.scales_gpu)              cuda_free(l.scales_gpu);