diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index ffc9583d..ab8d7fa8 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -518,7 +518,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) printf("\n is_nan_or_inf(state.input) = %d \n", input_nan_inf); if (input_nan_inf) getchar(); - int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.size * l.size * l.c * l.n); + int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.nweights); printf("\n is_nan_or_inf(l.weights_gpu) = %d \n", weights_nan_inf); if (weights_nan_inf) getchar(); */ @@ -697,8 +697,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state // calculate conv weight updates // Already: l.weight_updates_gpu = (l.weight_updates_gpu - l.weight*decay*batch*subdivision)*momentum // so we should copy f32 to f16, or compute: f16=(w_up - w*d*b*s)*m - assert((l.c*l.n*l.size*l.size) > 0); - cuda_convert_f32_to_f16(l.weight_updates_gpu, l.c*l.n*l.size*l.size, l.weight_updates_gpu16); + assert((l.nweights) > 0); + cuda_convert_f32_to_f16(l.weight_updates_gpu, l.nweights, l.weight_updates_gpu16); CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(), &one, @@ -714,7 +714,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state l.dweightDesc16, l.weight_updates_gpu16)); // l.weight_updates_gpu); - cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.c*l.n*l.size*l.size, l.weight_updates_gpu); + cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.nweights, l.weight_updates_gpu); if (state.delta) { if (l.binary || l.xnor) swap_binary(&l); @@ -856,7 +856,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state if (state.delta) { fix_nan_and_inf(state.delta, l.inputs * l.batch); } - int size = l.size * l.size * l.c * l.n; + int size = l.nweights; fix_nan_and_inf(l.weight_updates_gpu, size); fix_nan_and_inf(l.weights_gpu, size); } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index c7f51a66..bc27672d 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -1001,7 +1001,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) //size_t bit_input_size = intput_size / 8 + 1; //char *bit_input = calloc(bit_input_size, sizeof(char)); - //size_t weights_size = k * m; //l.size*l.size*l.c*l.n; + //size_t weights_size = k * m; //l.size*l.size*l.c*l.n; // l.nweights //size_t bit_weights_size = weights_size / 8 + 1; //char *bit_weights = calloc(bit_weights_size, sizeof(char)); @@ -1136,7 +1136,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state) void update_convolutional_layer(convolutional_layer l, int batch, float learning_rate, float momentum, float decay) { - //int size = l.size*l.size*l.c*l.n; + //int size = l.nweights; axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1); scal_cpu(l.n, momentum, l.bias_updates, 1); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 40f71eb0..edaefc34 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -162,8 +162,8 @@ void forward_backward_network_gpu(network net, float *x, float *y) layer l = net.layers[i]; if (net.cudnn_half){ if (l.type == CONVOLUTIONAL && l.weights_gpu && l.weights_gpu16) { - assert((l.c*l.n*l.size*l.size) > 0); - cuda_convert_f32_to_f16(l.weights_gpu, l.c*l.n*l.size*l.size, l.weights_gpu16); + assert((l.nweights) > 0); + cuda_convert_f32_to_f16(l.weights_gpu, l.nweights, l.weights_gpu16); } else if (l.type == CRNN && l.input_layer->weights_gpu && l.input_layer->weights_gpu16) { assert((l.input_layer->c*l.input_layer->n*l.input_layer->size*l.input_layer->size) > 0);