mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Fixed CRNN (RNN based on Convolution) layer
This commit is contained in:
@ -67,6 +67,8 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
|
||||
l.size = 1;
|
||||
l.stride = 1;
|
||||
l.pad = 0;
|
||||
l.activation = activation;
|
||||
l.learning_rate_scale = 1;
|
||||
|
||||
l.output = calloc(total_batch*outputs, sizeof(float));
|
||||
l.delta = calloc(total_batch*outputs, sizeof(float));
|
||||
@ -145,7 +147,6 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
|
||||
l.workspace_size = get_connected_workspace_size(l);
|
||||
#endif // CUDNN
|
||||
#endif // GPU
|
||||
l.activation = activation;
|
||||
fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs);
|
||||
return l;
|
||||
}
|
||||
|
@ -333,6 +333,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
l.size = size;
|
||||
l.pad = padding;
|
||||
l.batch_normalize = batch_normalize;
|
||||
l.learning_rate_scale = 1;
|
||||
|
||||
l.weights = calloc(c*n*size*size, sizeof(float));
|
||||
l.weight_updates = calloc(c*n*size*size, sizeof(float));
|
||||
@ -350,6 +351,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
l.out_c = n;
|
||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
l.activation = activation;
|
||||
|
||||
l.output = calloc(total_batch*l.outputs, sizeof(float));
|
||||
l.delta = calloc(total_batch*l.outputs, sizeof(float));
|
||||
@ -417,17 +419,17 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
}
|
||||
|
||||
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
|
||||
#ifdef CUDNN_HALF
|
||||
l.weights_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weights, c*n*size*size / 2);
|
||||
l.weight_updates_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weight_updates, c*n*size*size / 2);
|
||||
#endif
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
|
||||
|
||||
l.biases_gpu = cuda_make_array(l.biases, n);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n);
|
||||
l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
|
||||
l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n);
|
||||
|
||||
if(binary){
|
||||
l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
|
||||
@ -439,6 +441,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
}
|
||||
|
||||
if(batch_normalize){
|
||||
l.scales_gpu = cuda_make_array(l.scales, n);
|
||||
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
|
||||
|
||||
l.mean_gpu = cuda_make_array(l.mean, n);
|
||||
l.variance_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
@ -448,9 +453,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
l.mean_delta_gpu = cuda_make_array(l.mean, n);
|
||||
l.variance_delta_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.scales_gpu = cuda_make_array(l.scales, n);
|
||||
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
|
||||
|
||||
l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
|
||||
l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
|
||||
}
|
||||
@ -463,7 +465,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
l.workspace_size = get_workspace_size(l);
|
||||
size_t workspace_size16 = get_workspace_size16(l);
|
||||
if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16;
|
||||
l.activation = activation;
|
||||
|
||||
//fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
|
||||
l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
|
||||
|
@ -26,7 +26,7 @@ static void increment_layer(layer *l, int steps)
|
||||
#endif
|
||||
}
|
||||
|
||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
|
||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize)
|
||||
{
|
||||
fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
|
||||
batch = batch / steps;
|
||||
@ -47,20 +47,20 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
|
||||
l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
|
||||
|
||||
l.input_layer = malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||
fprintf(stderr, "");
|
||||
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||
l.input_layer->batch = batch;
|
||||
if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
|
||||
|
||||
l.self_layer = malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||
fprintf(stderr, "");
|
||||
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||
l.self_layer->batch = batch;
|
||||
if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
|
||||
|
||||
l.output_layer = malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||
fprintf(stderr, "");
|
||||
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||
l.output_layer->batch = batch;
|
||||
if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
|
||||
|
||||
@ -75,8 +75,7 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
|
||||
l.forward_gpu = forward_crnn_layer_gpu;
|
||||
l.backward_gpu = backward_crnn_layer_gpu;
|
||||
l.update_gpu = update_crnn_layer_gpu;
|
||||
|
||||
l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
|
||||
l.state_gpu = cuda_make_array(l.state, batch*l.hidden*(steps + 1));
|
||||
l.output_gpu = l.output_layer->output_gpu;
|
||||
l.delta_gpu = l.output_layer->delta_gpu;
|
||||
#endif
|
||||
@ -263,8 +262,8 @@ void backward_crnn_layer_gpu(layer l, network_state state)
|
||||
increment_layer(&output_layer, l.steps - 1);
|
||||
l.state_gpu += l.hidden*l.batch*l.steps;
|
||||
for (i = l.steps-1; i >= 0; --i) {
|
||||
copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);
|
||||
axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
|
||||
//copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN
|
||||
//axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN
|
||||
|
||||
s.input = l.state_gpu;
|
||||
s.delta = self_layer.delta_gpu;
|
||||
@ -272,12 +271,13 @@ void backward_crnn_layer_gpu(layer l, network_state state)
|
||||
|
||||
l.state_gpu -= l.hidden*l.batch;
|
||||
|
||||
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
|
||||
|
||||
s.input = l.state_gpu;
|
||||
s.delta = self_layer.delta_gpu - l.hidden*l.batch;
|
||||
if (i == 0) s.delta = 0;
|
||||
backward_convolutional_layer_gpu(self_layer, s);
|
||||
|
||||
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
|
||||
if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
|
||||
s.input = state.input + i*l.inputs*l.batch;
|
||||
if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
|
||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize);
|
||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize);
|
||||
|
||||
void forward_crnn_layer(layer l, network_state state);
|
||||
void backward_crnn_layer(layer l, network_state state);
|
||||
|
@ -35,6 +35,9 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
|
||||
l.type = LSTM;
|
||||
l.steps = steps;
|
||||
l.inputs = inputs;
|
||||
l.out_w = 1;
|
||||
l.out_h = 1;
|
||||
l.out_c = outputs;
|
||||
|
||||
l.uf = malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
|
13
src/parser.c
13
src/parser.c
@ -182,13 +182,17 @@ convolutional_layer parse_convolutional(list *options, size_params params)
|
||||
|
||||
layer parse_crnn(list *options, size_params params)
|
||||
{
|
||||
int output_filters = option_find_int(options, "output_filters",1);
|
||||
int hidden_filters = option_find_int(options, "hidden_filters",1);
|
||||
int size = option_find_int_quiet(options, "size", 3);
|
||||
int stride = option_find_int_quiet(options, "stride", 1);
|
||||
int pad = option_find_int_quiet(options, "pad", 1);
|
||||
|
||||
int output_filters = option_find_int(options, "output",1);
|
||||
int hidden_filters = option_find_int(options, "hidden",1);
|
||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||
ACTIVATION activation = get_activation(activation_s);
|
||||
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
|
||||
|
||||
layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize);
|
||||
layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, size, stride, pad, activation, batch_normalize);
|
||||
|
||||
l.shortcut = option_find_int_quiet(options, "shortcut", 0);
|
||||
|
||||
@ -866,7 +870,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
|
||||
check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half)
|
||||
}
|
||||
if (workspace_size) {
|
||||
printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000);
|
||||
printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000);
|
||||
net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
|
||||
}
|
||||
else {
|
||||
@ -1167,6 +1171,7 @@ void load_convolutional_weights(layer l, FILE *fp)
|
||||
}
|
||||
int num = l.n*l.c*l.size*l.size;
|
||||
fread(l.biases, sizeof(float), l.n, fp);
|
||||
//fread(l.weights, sizeof(float), num, fp); // as in connected layer
|
||||
if (l.batch_normalize && (!l.dontloadscales)){
|
||||
fread(l.scales, sizeof(float), l.n, fp);
|
||||
fread(l.rolling_mean, sizeof(float), l.n, fp);
|
||||
|
@ -36,6 +36,9 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
|
||||
l.steps = steps;
|
||||
l.hidden = hidden;
|
||||
l.inputs = inputs;
|
||||
l.out_w = 1;
|
||||
l.out_h = 1;
|
||||
l.out_c = outputs;
|
||||
|
||||
l.state = calloc(batch*hidden*(steps+1), sizeof(float));
|
||||
|
||||
@ -264,7 +267,7 @@ void backward_rnn_layer_gpu(layer l, network_state state)
|
||||
|
||||
l.state_gpu -= l.hidden*l.batch;
|
||||
|
||||
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
|
||||
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); // the same delta for Input and Self layers
|
||||
|
||||
s.input = l.state_gpu;
|
||||
s.delta = self_layer.delta_gpu - l.hidden*l.batch;
|
||||
|
Reference in New Issue
Block a user