From 9c02df864e32259292e3189a0879b361165eadfb Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Wed, 4 Sep 2019 18:50:56 +0300
Subject: [PATCH] Fixed assisted_excitation and added also for [shortcut] layer

---
 src/convolutional_kernels.cu | 33 ++++++++++++++++++++++++++-------
 src/convolutional_layer.c    |  9 +++++++--
 src/image.c                  | 14 ++++++++++++++
 src/image.h                  |  1 +
 src/layer.c                  |  4 ++++
 src/parser.c                 |  3 ++-
 src/shortcut_layer.c         | 21 +++++++++++++++++----
 src/shortcut_layer.h         |  2 +-
 8 files changed, 72 insertions(+), 15 deletions(-)
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index 566fb893..d766c9cf 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -917,13 +917,10 @@ __global__ void calc_avg_activation_kernel(float *src, float *dst, int size, int
     }
 }
 
-#include <iostream>
-
 void calc_avg_activation_gpu(float *src, float *dst, int size, int channels, int batches)
 {
     const int num_blocks = get_number_of_blocks(size*batches, BLOCK);
 
-    std::cout << " size = " << size << ",  channels = " << channels << ", batches = " << batches << std::endl;
     calc_avg_activation_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (src, dst, size, channels, batches);
 }
 
@@ -937,6 +934,9 @@ __global__ void assisted_activation_kernel(float alpha, float *output, float *gt
     if (b < batches) {
         for (int c = 0; c < channels; ++c) {
             output[xy + size*(c + channels*b)] += alpha * gt_gpu[i] * a_avg_gpu[i];
+            //output[xy + size*(c + channels*b)] += gt_gpu[i] * a_avg_gpu[i];
+            //output[xy + size*(c + channels*b)] += gt_gpu[i] * output[xy + size*(c + channels*b)];
+            //output[xy + size*(c + channels*b)] = a_avg_gpu[i];
         }
     }
 }
@@ -953,12 +953,18 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state)
     const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
 
     // epoch
-    const float epoch = (float)(*state.net.seen) / state.net.train_images_num;
+    //const float epoch = (float)(*state.net.seen) / state.net.train_images_num;
 
     // calculate alpha
     //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches);
     //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches);
-    const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2;
+    //const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2;
+    float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches));
+
+    if (l.assisted_excitation > 1) {
+        if (iteration_num > l.assisted_excitation) alpha = 0;
+        else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation));
+    }
 
     //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n",
     //    epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num);
@@ -969,7 +975,7 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state)
     float *gt = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float));
 
     int b;
-    int w, h, c;
+    int w, h;
 
     l.max_boxes = state.net.num_boxes;
     l.truths = l.max_boxes*(4 + 1);
@@ -1061,15 +1067,28 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state)
 
         for (b = 0; b < l.batch; ++b)
         {
+            printf(" Assisted Excitation alpha = %f \n", alpha);
             image img = float_to_image(l.out_w, l.out_h, 1, &gt[l.out_w*l.out_h*b]);
             char buff[100];
             sprintf(buff, "a_excitation_%d", b);
             show_image_cv(img, buff);
 
-            image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]);
+            //image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]);
+            image img2 = float_to_image_scaled(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]);
             char buff2[100];
             sprintf(buff2, "a_excitation_act_%d", b);
             show_image_cv(img2, buff2);
+
+            /*
+            int c = l.out_c;
+            if (c > 4) c = 4;
+            image img3 = float_to_image(l.out_w, l.out_h, c, &l.output[l.out_w*l.out_h*l.out_c*b]);
+            image dc = collapse_image_layers(img3, 1);
+            char buff3[100];
+            sprintf(buff3, "a_excitation_act_collapsed_%d", b);
+            show_image_cv(dc, buff3);
+            */
+
             wait_key_cv(5);
         }
         wait_until_press_key_cv();
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index 157058eb..72bb6025 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -1172,12 +1172,17 @@ void assisted_excitation_forward(convolutional_layer l, network_state state)
     const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
 
     // epoch
-    const float epoch = (float)(*state.net.seen) / state.net.train_images_num;
+    //const float epoch = (float)(*state.net.seen) / state.net.train_images_num;
 
     // calculate alpha
     //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches);
     //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches);
-    const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2;
+    float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches));
+
+    if (l.assisted_excitation > 1) {
+        if (iteration_num > l.assisted_excitation) alpha = 0;
+        else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation));
+    }
 
     //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n",
     //    epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num);
diff --git a/src/image.c b/src/image.c
index 2f085801..8befaa2b 100644
--- a/src/image.c
+++ b/src/image.c
@@ -770,6 +770,20 @@ image make_random_image(int w, int h, int c)
     return out;
 }
 
+image float_to_image_scaled(int w, int h, int c, float *data)
+{
+    image out = make_image(w, h, c);
+    int abs_max = 0;
+    int i = 0;
+    for (i = 0; i < w*h*c; ++i) {
+        if (fabs(data[i]) > abs_max) abs_max = fabs(data[i]);
+    }
+    for (i = 0; i < w*h*c; ++i) {
+        out.data[i] = data[i] / abs_max;
+    }
+    return out;
+}
+
 image float_to_image(int w, int h, int c, float *data)
 {
     image out = make_empty_image(w,h,c);
diff --git a/src/image.h b/src/image.h
index 3a1c5b9a..14792c9b 100644
--- a/src/image.h
+++ b/src/image.h
@@ -79,6 +79,7 @@ void print_image(image m);
 //LIB_API image make_image(int w, int h, int c);
 image make_random_image(int w, int h, int c);
 image make_empty_image(int w, int h, int c);
+image float_to_image_scaled(int w, int h, int c, float *data);
 image float_to_image(int w, int h, int c, float *data);
 image copy_image(image p);
 image load_image(char *filename, int w, int h, int c);
diff --git a/src/layer.c b/src/layer.c
index b6ae95db..e9ae67b5 100644
--- a/src/layer.c
+++ b/src/layer.c
@@ -157,6 +157,10 @@ void free_layer(layer l)
     if (l.x_gpu)                   cuda_free(l.x_gpu);  // dont free
     if (l.x_norm_gpu)              cuda_free(l.x_norm_gpu);
 
+    // assisted excitation
+    if (l.gt_gpu)                  cuda_free(l.gt_gpu);
+    if (l.a_avg_gpu)               cuda_free(l.a_avg_gpu);
+
     if (l.align_bit_weights_gpu)   cuda_free((float *)l.align_bit_weights_gpu);
     if (l.mean_arr_gpu)            cuda_free(l.mean_arr_gpu);
     if (l.align_workspace_gpu)     cuda_free(l.align_workspace_gpu);
diff --git a/src/parser.c b/src/parser.c
index 97d6aef9..4b56dfc4 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -601,6 +601,7 @@ layer parse_batchnorm(list *options, size_params params)
 
 layer parse_shortcut(list *options, size_params params, network net)
 {
+    int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0);
     char *l = option_find(options, "from");
     int index = atoi(l);
     if(index < 0) index = params.index + index;
@@ -608,7 +609,7 @@ layer parse_shortcut(list *options, size_params params, network net)
     int batch = params.batch;
     layer from = net.layers[index];
 
-    layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);
+    layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, assisted_excitation);
 
     char *activation_s = option_find_str(options, "activation", "linear");
     ACTIVATION activation = get_activation(activation_s);
diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c
index 1f7c6d35..d056a6a0 100644
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@@ -4,9 +4,10 @@
 #include <stdio.h>
 #include <assert.h>
 
-layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2)
+layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation)
 {
-    fprintf(stderr,"Shortcut Layer: %d\n", index);
+    if(assisted_excitation) fprintf(stderr, "Shortcut Layer - AE: %d\n", index);
+    else fprintf(stderr,"Shortcut Layer: %d\n", index);
     layer l = { (LAYER_TYPE)0 };
     l.type = SHORTCUT;
     l.batch = batch;
@@ -19,6 +20,8 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
     l.outputs = w*h*c;
     l.inputs = l.outputs;
 
+    l.assisted_excitation = assisted_excitation;
+
     if(w != w2 || h != h2 || c != c2) fprintf(stderr, " w = %d, w2 = %d, h = %d, h2 = %d, c = %d, c2 = %d \n", w, w2, h, h2, c, c2);
 
     l.index = index;
@@ -28,13 +31,19 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
 
     l.forward = forward_shortcut_layer;
     l.backward = backward_shortcut_layer;
-    #ifdef GPU
+#ifdef GPU
     l.forward_gpu = forward_shortcut_layer_gpu;
     l.backward_gpu = backward_shortcut_layer_gpu;
 
     l.delta_gpu =  cuda_make_array(l.delta, l.outputs*batch);
     l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
-    #endif
+    if (l.assisted_excitation)
+    {
+        const int size = l.out_w * l.out_h * l.batch;
+        l.gt_gpu = cuda_make_array(NULL, size);
+        l.a_avg_gpu = cuda_make_array(NULL, size);
+    }
+#endif  // GPU
     return l;
 }
 
@@ -72,6 +81,8 @@ void forward_shortcut_layer(const layer l, network_state state)
         shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output);
     }
     activate_array(l.output, l.outputs*l.batch, l.activation);
+
+    if (l.assisted_excitation && state.train) assisted_excitation_forward(l, state);
 }
 
 void backward_shortcut_layer(const layer l, network_state state)
@@ -89,6 +100,8 @@ void forward_shortcut_layer_gpu(const layer l, network_state state)
     //shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu);
     input_shortcut_gpu(state.input, l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu);
     activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
+
+    if (l.assisted_excitation && state.train) assisted_excitation_forward_gpu(l, state);
 }
 
 void backward_shortcut_layer_gpu(const layer l, network_state state)
diff --git a/src/shortcut_layer.h b/src/shortcut_layer.h
index b24aa3e6..ad8d45f3 100644
--- a/src/shortcut_layer.h
+++ b/src/shortcut_layer.h
@@ -7,7 +7,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2);
+layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation);
 void forward_shortcut_layer(const layer l, network_state state);
 void backward_shortcut_layer(const layer l, network_state state);
 void resize_shortcut_layer(layer *l, int w, int h);