Added support for Tensor Cores CC >= 7.0 (V100). For FP16/32 (mixed precision) define CUDNN_HALF should be used.

2023-08-10 21:13:14 +03:00 · 2018-02-25 16:29:44 +03:00
parent 85eafd3d59
commit cad4d1618f
8 changed files with 123 additions and 18 deletions
--- a/src/layer.h
+++ b/src/layer.h
@ -243,6 +243,7 @@ struct layer{
    float * weight_updates_gpu;

 	float * weights_gpu16;
+	float * weight_updates_gpu16;

    float * biases_gpu;
    float * bias_updates_gpu;