mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Added antialiasing=1 param for [convolutional]-layer on GPU
This commit is contained in:
@ -211,6 +211,7 @@ struct layer {
|
||||
int stride_x;
|
||||
int stride_y;
|
||||
int dilation;
|
||||
int antialiasing;
|
||||
int maxpool_depth;
|
||||
int out_channels;
|
||||
int reverse;
|
||||
@ -528,6 +529,7 @@ struct layer {
|
||||
float * scale_updates_gpu;
|
||||
float * scale_change_gpu;
|
||||
|
||||
float * input_antialiasing_gpu;
|
||||
float * output_gpu;
|
||||
float * output_sigmoid_gpu;
|
||||
float * loss_gpu;
|
||||
|
@ -66,44 +66,44 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i
|
||||
|
||||
// U
|
||||
l.uf = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.uf->batch = batch;
|
||||
if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;
|
||||
|
||||
l.ui = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.ui->batch = batch;
|
||||
if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;
|
||||
|
||||
l.ug = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.ug->batch = batch;
|
||||
if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;
|
||||
|
||||
l.uo = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.uo->batch = batch;
|
||||
if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;
|
||||
|
||||
|
||||
// W
|
||||
l.wf = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.wf->batch = batch;
|
||||
if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;
|
||||
|
||||
l.wi = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.wi->batch = batch;
|
||||
if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;
|
||||
|
||||
l.wg = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.wg->batch = batch;
|
||||
if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;
|
||||
|
||||
l.wo = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.wo->batch = batch;
|
||||
if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size;
|
||||
|
||||
@ -111,21 +111,21 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i
|
||||
// V
|
||||
l.vf = (layer*)calloc(1, sizeof(layer));
|
||||
if (l.peephole) {
|
||||
*(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.vf->batch = batch;
|
||||
if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size;
|
||||
}
|
||||
|
||||
l.vi = (layer*)calloc(1, sizeof(layer));
|
||||
if (l.peephole) {
|
||||
*(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.vi->batch = batch;
|
||||
if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size;
|
||||
}
|
||||
|
||||
l.vo = (layer*)calloc(1, sizeof(layer));
|
||||
if (l.peephole) {
|
||||
*(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.vo->batch = batch;
|
||||
if (l.workspace_size < l.vo->workspace_size) l.workspace_size = l.vo->workspace_size;
|
||||
}
|
||||
|
@ -604,10 +604,34 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
||||
if (state.net.try_fix_nan) {
|
||||
fix_nan_and_inf(l.output_gpu, l.outputs*l.batch);
|
||||
}
|
||||
|
||||
if (l.antialiasing) {
|
||||
network_state s = { 0 };
|
||||
s.train = state.train;
|
||||
s.workspace = state.workspace;
|
||||
s.net = state.net;
|
||||
if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() )
|
||||
s.input = l.output_gpu;
|
||||
forward_convolutional_layer_gpu(*(l.input_layer), s);
|
||||
simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.input_antialiasing_gpu);
|
||||
simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.input_layer->output_gpu, l.output_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
||||
{
|
||||
if (l.antialiasing) {
|
||||
network_state s = { 0 };
|
||||
s.train = state.train;
|
||||
s.workspace = state.workspace;
|
||||
s.net = state.net;
|
||||
s.delta = l.delta_gpu;
|
||||
s.input = l.input_antialiasing_gpu;
|
||||
//if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() )
|
||||
simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.delta_gpu, l.input_layer->delta_gpu);
|
||||
backward_convolutional_layer_gpu(*(l.input_layer), s);
|
||||
}
|
||||
|
||||
if(state.net.try_fix_nan) constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
|
||||
|
||||
if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.output_sigmoid_gpu, l.delta_gpu);
|
||||
|
@ -332,7 +332,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer)
|
||||
convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer)
|
||||
{
|
||||
int total_batch = batch*steps;
|
||||
int i;
|
||||
@ -342,6 +342,13 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
if (xnor) groups = 1; // disable groups for XNOR-net
|
||||
if (groups < 1) groups = 1;
|
||||
|
||||
const int blur_stride_x = stride_x;
|
||||
const int blur_stride_y = stride_y;
|
||||
l.antialiasing = antialiasing;
|
||||
if (antialiasing) {
|
||||
stride_x = stride_y = l.stride = l.stride_x = l.stride_y = 1; // use stride=1 in host-layer
|
||||
}
|
||||
|
||||
l.share_layer = share_layer;
|
||||
l.index = index;
|
||||
l.h = h;
|
||||
@ -568,6 +575,47 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
|
||||
//fprintf(stderr, "%5d/%2d %2d x%2d /%2d(%d)%4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, groups, size, size, stride, dilation, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops);
|
||||
|
||||
if (l.antialiasing) {
|
||||
printf("AA: ");
|
||||
l.input_layer = (layer*)calloc(1, sizeof(layer));
|
||||
const int blur_size = 3;
|
||||
*(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL);
|
||||
const int blur_nweights = n * blur_size * blur_size; // (n / n) * n * blur_size * blur_size;
|
||||
int i;
|
||||
for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) {
|
||||
/*
|
||||
l.input_layer->weights[i + 0] = 0;
|
||||
l.input_layer->weights[i + 1] = 0;
|
||||
l.input_layer->weights[i + 2] = 0;
|
||||
|
||||
l.input_layer->weights[i + 3] = 0;
|
||||
l.input_layer->weights[i + 4] = 1;
|
||||
l.input_layer->weights[i + 5] = 0;
|
||||
|
||||
l.input_layer->weights[i + 6] = 0;
|
||||
l.input_layer->weights[i + 7] = 0;
|
||||
l.input_layer->weights[i + 8] = 0;
|
||||
*/
|
||||
l.input_layer->weights[i + 0] = 1 / 16.f;
|
||||
l.input_layer->weights[i + 1] = 2 / 16.f;
|
||||
l.input_layer->weights[i + 2] = 1 / 16.f;
|
||||
|
||||
l.input_layer->weights[i + 3] = 2 / 16.f;
|
||||
l.input_layer->weights[i + 4] = 4 / 16.f;
|
||||
l.input_layer->weights[i + 5] = 2 / 16.f;
|
||||
|
||||
l.input_layer->weights[i + 6] = 1 / 16.f;
|
||||
l.input_layer->weights[i + 7] = 2 / 16.f;
|
||||
l.input_layer->weights[i + 8] = 1 / 16.f;
|
||||
|
||||
}
|
||||
for (i = 0; i < n; ++i) l.input_layer->biases[i] = 0;
|
||||
#ifdef GPU
|
||||
l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs);
|
||||
push_convolutional_layer(*(l.input_layer));
|
||||
#endif // GPU
|
||||
}
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
@ -588,7 +636,7 @@ void denormalize_convolutional_layer(convolutional_layer l)
|
||||
|
||||
void test_convolutional_layer()
|
||||
{
|
||||
convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, NULL);
|
||||
convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, 0, NULL);
|
||||
l.batch_normalize = 1;
|
||||
float data[] = {1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
|
@ -30,7 +30,7 @@ void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
|
||||
#endif
|
||||
|
||||
size_t get_convolutional_workspace_size(layer l);
|
||||
convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer);
|
||||
convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer);
|
||||
void denormalize_convolutional_layer(convolutional_layer l);
|
||||
void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
|
||||
void forward_convolutional_layer(const convolutional_layer layer, network_state state);
|
||||
|
@ -50,17 +50,17 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
|
||||
l.state = (float*)calloc(l.hidden * l.batch * (l.steps + 1), sizeof(float));
|
||||
|
||||
l.input_layer = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.input_layer->batch = batch;
|
||||
if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
|
||||
|
||||
l.self_layer = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.self_layer->batch = batch;
|
||||
if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
|
||||
|
||||
l.output_layer = (layer*)calloc(1, sizeof(layer));
|
||||
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
|
||||
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL);
|
||||
l.output_layer->batch = batch;
|
||||
if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
|
||||
|
||||
|
@ -13,6 +13,9 @@ void free_sublayer(layer *l)
|
||||
void free_layer(layer l)
|
||||
{
|
||||
if (l.share_layer != NULL) return; // don't free shared layers
|
||||
if (l.antialiasing) {
|
||||
free_sublayer(l.input_layer);
|
||||
}
|
||||
if (l.type == CONV_LSTM) {
|
||||
if (l.peephole) {
|
||||
free_sublayer(l.vf);
|
||||
@ -167,6 +170,7 @@ void free_layer(layer l)
|
||||
if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu), l.bias_updates_gpu = NULL;
|
||||
if (l.scales_gpu) cuda_free(l.scales_gpu), l.scales_gpu = NULL;
|
||||
if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu), l.scale_updates_gpu = NULL;
|
||||
if (l.input_antialiasing_gpu) cuda_free(l.input_antialiasing_gpu), l.input_antialiasing_gpu = NULL;
|
||||
if (l.output_gpu) cuda_free(l.output_gpu), l.output_gpu = NULL;
|
||||
if (l.output_sigmoid_gpu) cuda_free(l.output_sigmoid_gpu), l.output_sigmoid_gpu = NULL;
|
||||
if (l.delta_gpu) cuda_free(l.delta_gpu), l.delta_gpu = NULL;
|
||||
|
11
src/parser.c
11
src/parser.c
@ -161,6 +161,7 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo
|
||||
int stride_x = option_find_int_quiet(options, "stride_x", stride);
|
||||
int stride_y = option_find_int_quiet(options, "stride_y", stride);
|
||||
int dilation = option_find_int_quiet(options, "dilation", 1);
|
||||
int antialiasing = option_find_int_quiet(options, "antialiasing", 0);
|
||||
if (size == 1) dilation = 1;
|
||||
int pad = option_find_int_quiet(options, "pad",0);
|
||||
int padding = option_find_int_quiet(options, "padding",0);
|
||||
@ -185,7 +186,7 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo
|
||||
int xnor = option_find_int_quiet(options, "xnor", 0);
|
||||
int use_bin_output = option_find_int_quiet(options, "bin_output", 0);
|
||||
|
||||
convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, share_layer);
|
||||
convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer);
|
||||
layer.flipped = option_find_int_quiet(options, "flipped", 0);
|
||||
layer.dot = option_find_float_quiet(options, "dot", 0);
|
||||
layer.assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0);
|
||||
@ -991,11 +992,19 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
|
||||
n = n->next;
|
||||
++count;
|
||||
if(n){
|
||||
if (l.antialiasing) {
|
||||
params.h = l.input_layer->out_h;
|
||||
params.w = l.input_layer->out_w;
|
||||
params.c = l.input_layer->out_c;
|
||||
params.inputs = l.input_layer->outputs;
|
||||
}
|
||||
else {
|
||||
params.h = l.out_h;
|
||||
params.w = l.out_w;
|
||||
params.c = l.out_c;
|
||||
params.inputs = l.outputs;
|
||||
}
|
||||
}
|
||||
if (l.bflops > 0) bflops += l.bflops;
|
||||
}
|
||||
free_list(sections);
|
||||
|
Reference in New Issue
Block a user