mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Fixed weights-size for Depthwise grouped convolutional
This commit is contained in:
@ -518,7 +518,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
|||||||
printf("\n is_nan_or_inf(state.input) = %d \n", input_nan_inf);
|
printf("\n is_nan_or_inf(state.input) = %d \n", input_nan_inf);
|
||||||
if (input_nan_inf) getchar();
|
if (input_nan_inf) getchar();
|
||||||
|
|
||||||
int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.size * l.size * l.c * l.n);
|
int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.nweights);
|
||||||
printf("\n is_nan_or_inf(l.weights_gpu) = %d \n", weights_nan_inf);
|
printf("\n is_nan_or_inf(l.weights_gpu) = %d \n", weights_nan_inf);
|
||||||
if (weights_nan_inf) getchar();
|
if (weights_nan_inf) getchar();
|
||||||
*/
|
*/
|
||||||
@ -697,8 +697,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
|
|||||||
// calculate conv weight updates
|
// calculate conv weight updates
|
||||||
// Already: l.weight_updates_gpu = (l.weight_updates_gpu - l.weight*decay*batch*subdivision)*momentum
|
// Already: l.weight_updates_gpu = (l.weight_updates_gpu - l.weight*decay*batch*subdivision)*momentum
|
||||||
// so we should copy f32 to f16, or compute: f16=(w_up - w*d*b*s)*m
|
// so we should copy f32 to f16, or compute: f16=(w_up - w*d*b*s)*m
|
||||||
assert((l.c*l.n*l.size*l.size) > 0);
|
assert((l.nweights) > 0);
|
||||||
cuda_convert_f32_to_f16(l.weight_updates_gpu, l.c*l.n*l.size*l.size, l.weight_updates_gpu16);
|
cuda_convert_f32_to_f16(l.weight_updates_gpu, l.nweights, l.weight_updates_gpu16);
|
||||||
|
|
||||||
CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(),
|
CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(),
|
||||||
&one,
|
&one,
|
||||||
@ -714,7 +714,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
|
|||||||
l.dweightDesc16,
|
l.dweightDesc16,
|
||||||
l.weight_updates_gpu16)); // l.weight_updates_gpu);
|
l.weight_updates_gpu16)); // l.weight_updates_gpu);
|
||||||
|
|
||||||
cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.c*l.n*l.size*l.size, l.weight_updates_gpu);
|
cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.nweights, l.weight_updates_gpu);
|
||||||
|
|
||||||
if (state.delta) {
|
if (state.delta) {
|
||||||
if (l.binary || l.xnor) swap_binary(&l);
|
if (l.binary || l.xnor) swap_binary(&l);
|
||||||
@ -856,7 +856,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
|
|||||||
if (state.delta) {
|
if (state.delta) {
|
||||||
fix_nan_and_inf(state.delta, l.inputs * l.batch);
|
fix_nan_and_inf(state.delta, l.inputs * l.batch);
|
||||||
}
|
}
|
||||||
int size = l.size * l.size * l.c * l.n;
|
int size = l.nweights;
|
||||||
fix_nan_and_inf(l.weight_updates_gpu, size);
|
fix_nan_and_inf(l.weight_updates_gpu, size);
|
||||||
fix_nan_and_inf(l.weights_gpu, size);
|
fix_nan_and_inf(l.weights_gpu, size);
|
||||||
}
|
}
|
||||||
|
@ -1001,7 +1001,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
|
|||||||
//size_t bit_input_size = intput_size / 8 + 1;
|
//size_t bit_input_size = intput_size / 8 + 1;
|
||||||
//char *bit_input = calloc(bit_input_size, sizeof(char));
|
//char *bit_input = calloc(bit_input_size, sizeof(char));
|
||||||
|
|
||||||
//size_t weights_size = k * m; //l.size*l.size*l.c*l.n;
|
//size_t weights_size = k * m; //l.size*l.size*l.c*l.n; // l.nweights
|
||||||
//size_t bit_weights_size = weights_size / 8 + 1;
|
//size_t bit_weights_size = weights_size / 8 + 1;
|
||||||
|
|
||||||
//char *bit_weights = calloc(bit_weights_size, sizeof(char));
|
//char *bit_weights = calloc(bit_weights_size, sizeof(char));
|
||||||
@ -1136,7 +1136,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
|
|||||||
|
|
||||||
void update_convolutional_layer(convolutional_layer l, int batch, float learning_rate, float momentum, float decay)
|
void update_convolutional_layer(convolutional_layer l, int batch, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
//int size = l.size*l.size*l.c*l.n;
|
//int size = l.nweights;
|
||||||
axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
|
axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
|
||||||
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
||||||
|
|
||||||
|
@ -162,8 +162,8 @@ void forward_backward_network_gpu(network net, float *x, float *y)
|
|||||||
layer l = net.layers[i];
|
layer l = net.layers[i];
|
||||||
if (net.cudnn_half){
|
if (net.cudnn_half){
|
||||||
if (l.type == CONVOLUTIONAL && l.weights_gpu && l.weights_gpu16) {
|
if (l.type == CONVOLUTIONAL && l.weights_gpu && l.weights_gpu16) {
|
||||||
assert((l.c*l.n*l.size*l.size) > 0);
|
assert((l.nweights) > 0);
|
||||||
cuda_convert_f32_to_f16(l.weights_gpu, l.c*l.n*l.size*l.size, l.weights_gpu16);
|
cuda_convert_f32_to_f16(l.weights_gpu, l.nweights, l.weights_gpu16);
|
||||||
}
|
}
|
||||||
else if (l.type == CRNN && l.input_layer->weights_gpu && l.input_layer->weights_gpu16) {
|
else if (l.type == CRNN && l.input_layer->weights_gpu && l.input_layer->weights_gpu16) {
|
||||||
assert((l.input_layer->c*l.input_layer->n*l.input_layer->size*l.input_layer->size) > 0);
|
assert((l.input_layer->c*l.input_layer->n*l.input_layer->size*l.input_layer->size) > 0);
|
||||||
|
Reference in New Issue
Block a user