From 9c02df864e32259292e3189a0879b361165eadfb Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 4 Sep 2019 18:50:56 +0300 Subject: [PATCH] Fixed assisted_excitation and added also for [shortcut] layer --- src/convolutional_kernels.cu | 33 ++++++++++++++++++++++++++------- src/convolutional_layer.c | 9 +++++++-- src/image.c | 14 ++++++++++++++ src/image.h | 1 + src/layer.c | 4 ++++ src/parser.c | 3 ++- src/shortcut_layer.c | 21 +++++++++++++++++---- src/shortcut_layer.h | 2 +- 8 files changed, 72 insertions(+), 15 deletions(-) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 566fb893..d766c9cf 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -917,13 +917,10 @@ __global__ void calc_avg_activation_kernel(float *src, float *dst, int size, int } } -#include - void calc_avg_activation_gpu(float *src, float *dst, int size, int channels, int batches) { const int num_blocks = get_number_of_blocks(size*batches, BLOCK); - std::cout << " size = " << size << ", channels = " << channels << ", batches = " << batches << std::endl; calc_avg_activation_kernel << > > (src, dst, size, channels, batches); } @@ -937,6 +934,9 @@ __global__ void assisted_activation_kernel(float alpha, float *output, float *gt if (b < batches) { for (int c = 0; c < channels; ++c) { output[xy + size*(c + channels*b)] += alpha * gt_gpu[i] * a_avg_gpu[i]; + //output[xy + size*(c + channels*b)] += gt_gpu[i] * a_avg_gpu[i]; + //output[xy + size*(c + channels*b)] += gt_gpu[i] * output[xy + size*(c + channels*b)]; + //output[xy + size*(c + channels*b)] = a_avg_gpu[i]; } } } @@ -953,12 +953,18 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); // epoch - const float epoch = (float)(*state.net.seen) / state.net.train_images_num; + //const float epoch = (float)(*state.net.seen) / state.net.train_images_num; // calculate alpha //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); - const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + //const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); + + if (l.assisted_excitation > 1) { + if (iteration_num > l.assisted_excitation) alpha = 0; + else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation)); + } //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", // epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num); @@ -969,7 +975,7 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) float *gt = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float)); int b; - int w, h, c; + int w, h; l.max_boxes = state.net.num_boxes; l.truths = l.max_boxes*(4 + 1); @@ -1061,15 +1067,28 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) for (b = 0; b < l.batch; ++b) { + printf(" Assisted Excitation alpha = %f \n", alpha); image img = float_to_image(l.out_w, l.out_h, 1, >[l.out_w*l.out_h*b]); char buff[100]; sprintf(buff, "a_excitation_%d", b); show_image_cv(img, buff); - image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); + //image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); + image img2 = float_to_image_scaled(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); char buff2[100]; sprintf(buff2, "a_excitation_act_%d", b); show_image_cv(img2, buff2); + + /* + int c = l.out_c; + if (c > 4) c = 4; + image img3 = float_to_image(l.out_w, l.out_h, c, &l.output[l.out_w*l.out_h*l.out_c*b]); + image dc = collapse_image_layers(img3, 1); + char buff3[100]; + sprintf(buff3, "a_excitation_act_collapsed_%d", b); + show_image_cv(dc, buff3); + */ + wait_key_cv(5); } wait_until_press_key_cv(); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 157058eb..72bb6025 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -1172,12 +1172,17 @@ void assisted_excitation_forward(convolutional_layer l, network_state state) const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); // epoch - const float epoch = (float)(*state.net.seen) / state.net.train_images_num; + //const float epoch = (float)(*state.net.seen) / state.net.train_images_num; // calculate alpha //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); - const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); + + if (l.assisted_excitation > 1) { + if (iteration_num > l.assisted_excitation) alpha = 0; + else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation)); + } //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", // epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num); diff --git a/src/image.c b/src/image.c index 2f085801..8befaa2b 100644 --- a/src/image.c +++ b/src/image.c @@ -770,6 +770,20 @@ image make_random_image(int w, int h, int c) return out; } +image float_to_image_scaled(int w, int h, int c, float *data) +{ + image out = make_image(w, h, c); + int abs_max = 0; + int i = 0; + for (i = 0; i < w*h*c; ++i) { + if (fabs(data[i]) > abs_max) abs_max = fabs(data[i]); + } + for (i = 0; i < w*h*c; ++i) { + out.data[i] = data[i] / abs_max; + } + return out; +} + image float_to_image(int w, int h, int c, float *data) { image out = make_empty_image(w,h,c); diff --git a/src/image.h b/src/image.h index 3a1c5b9a..14792c9b 100644 --- a/src/image.h +++ b/src/image.h @@ -79,6 +79,7 @@ void print_image(image m); //LIB_API image make_image(int w, int h, int c); image make_random_image(int w, int h, int c); image make_empty_image(int w, int h, int c); +image float_to_image_scaled(int w, int h, int c, float *data); image float_to_image(int w, int h, int c, float *data); image copy_image(image p); image load_image(char *filename, int w, int h, int c); diff --git a/src/layer.c b/src/layer.c index b6ae95db..e9ae67b5 100644 --- a/src/layer.c +++ b/src/layer.c @@ -157,6 +157,10 @@ void free_layer(layer l) if (l.x_gpu) cuda_free(l.x_gpu); // dont free if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); + // assisted excitation + if (l.gt_gpu) cuda_free(l.gt_gpu); + if (l.a_avg_gpu) cuda_free(l.a_avg_gpu); + if (l.align_bit_weights_gpu) cuda_free((float *)l.align_bit_weights_gpu); if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu); if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu); diff --git a/src/parser.c b/src/parser.c index 97d6aef9..4b56dfc4 100644 --- a/src/parser.c +++ b/src/parser.c @@ -601,6 +601,7 @@ layer parse_batchnorm(list *options, size_params params) layer parse_shortcut(list *options, size_params params, network net) { + int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); char *l = option_find(options, "from"); int index = atoi(l); if(index < 0) index = params.index + index; @@ -608,7 +609,7 @@ layer parse_shortcut(list *options, size_params params, network net) int batch = params.batch; layer from = net.layers[index]; - layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, assisted_excitation); char *activation_s = option_find_str(options, "activation", "linear"); ACTIVATION activation = get_activation(activation_s); diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index 1f7c6d35..d056a6a0 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -4,9 +4,10 @@ #include #include -layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation) { - fprintf(stderr,"Shortcut Layer: %d\n", index); + if(assisted_excitation) fprintf(stderr, "Shortcut Layer - AE: %d\n", index); + else fprintf(stderr,"Shortcut Layer: %d\n", index); layer l = { (LAYER_TYPE)0 }; l.type = SHORTCUT; l.batch = batch; @@ -19,6 +20,8 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.outputs = w*h*c; l.inputs = l.outputs; + l.assisted_excitation = assisted_excitation; + if(w != w2 || h != h2 || c != c2) fprintf(stderr, " w = %d, w2 = %d, h = %d, h2 = %d, c = %d, c2 = %d \n", w, w2, h, h2, c, c2); l.index = index; @@ -28,13 +31,19 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.forward = forward_shortcut_layer; l.backward = backward_shortcut_layer; - #ifdef GPU +#ifdef GPU l.forward_gpu = forward_shortcut_layer_gpu; l.backward_gpu = backward_shortcut_layer_gpu; l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); l.output_gpu = cuda_make_array(l.output, l.outputs*batch); - #endif + if (l.assisted_excitation) + { + const int size = l.out_w * l.out_h * l.batch; + l.gt_gpu = cuda_make_array(NULL, size); + l.a_avg_gpu = cuda_make_array(NULL, size); + } +#endif // GPU return l; } @@ -72,6 +81,8 @@ void forward_shortcut_layer(const layer l, network_state state) shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output); } activate_array(l.output, l.outputs*l.batch, l.activation); + + if (l.assisted_excitation && state.train) assisted_excitation_forward(l, state); } void backward_shortcut_layer(const layer l, network_state state) @@ -89,6 +100,8 @@ void forward_shortcut_layer_gpu(const layer l, network_state state) //shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); input_shortcut_gpu(state.input, l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); + + if (l.assisted_excitation && state.train) assisted_excitation_forward_gpu(l, state); } void backward_shortcut_layer_gpu(const layer l, network_state state) diff --git a/src/shortcut_layer.h b/src/shortcut_layer.h index b24aa3e6..ad8d45f3 100644 --- a/src/shortcut_layer.h +++ b/src/shortcut_layer.h @@ -7,7 +7,7 @@ #ifdef __cplusplus extern "C" { #endif -layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation); void forward_shortcut_layer(const layer l, network_state state); void backward_shortcut_layer(const layer l, network_state state); void resize_shortcut_layer(layer *l, int w, int h);