Use half_float16 instead of float32 if defined both CUDNN and CUDNN_HALF. Use Tensor Cores.

2023-08-10 21:13:14 +03:00 · 2018-02-22 22:54:40 +03:00
parent 033e934ce8
commit dda993f3dd
3 changed files with 117 additions and 11 deletions
--- a/src/layer.h
+++ b/src/layer.h
@ -242,6 +242,8 @@ struct layer{
    float * weights_gpu;
    float * weight_updates_gpu;

+	float * weights_gpu16;
+
    float * biases_gpu;
    float * bias_updates_gpu;