Improve training performance - batch-norm using cuDNN.

2023-08-10 21:13:14 +03:00 · 2018-03-20 02:16:51 +03:00
parent 2f52cfeb07
commit 537d135feb
12 changed files with 193 additions and 42 deletions
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@ -37,7 +37,7 @@ __global__ void binarize_input_kernel(float *input, int n, int size, float *bina
    int i = 0;
    float mean = 0;
    for(i = 0; i < n; ++i){
-        mean += abs(input[i*size + s]);
+        mean += fabs(input[i*size + s]);
    }
    mean = mean / n;
    for(i = 0; i < n; ++i){
@ -59,7 +59,7 @@ __global__ void binarize_weights_kernel(float *weights, int n, int size, float *
    int i = 0;
    float mean = 0;
    for(i = 0; i < size; ++i){
-        mean += abs(weights[f*size + i]);
+        mean += fabs(weights[f*size + i]);
    }
    mean = mean / size;
    for(i = 0; i < size; ++i){
@ -205,8 +205,10 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)

    if (l.batch_normalize) {
        forward_batchnorm_layer_gpu(l, state);
-    }
-    add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
+	}
+	else {
+		add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
+	}

    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
    //if(l.dot > 0) dot_error_gpu(l);