#include "shortcut_layer.h" #include "cuda.h" #include "blas.h" #include "activations.h" #include #include layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) { fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); layer l = {0}; l.type = SHORTCUT; l.batch = batch; l.w = w2; l.h = h2; l.c = c2; l.out_w = w; l.out_h = h; l.out_c = c; l.outputs = w*h*c; l.inputs = l.outputs; l.index = index; l.delta = calloc(l.outputs*batch, sizeof(float)); l.output = calloc(l.outputs*batch, sizeof(float));; l.forward = forward_shortcut_layer; l.backward = backward_shortcut_layer; #ifdef GPU l.forward_gpu = forward_shortcut_layer_gpu; l.backward_gpu = backward_shortcut_layer_gpu; l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); l.output_gpu = cuda_make_array(l.output, l.outputs*batch); #endif return l; } void resize_shortcut_layer(layer *l, int w, int h) { assert(l->w == l->out_w); assert(l->h == l->out_h); l->w = l->out_w = w; l->h = l->out_h = h; l->outputs = w*h*l->out_c; l->inputs = l->outputs; l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); #ifdef GPU cuda_free(l->output_gpu); cuda_free(l->delta_gpu); l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); #endif } void forward_shortcut_layer(const layer l, network net) { copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); activate_array(l.output, l.outputs*l.batch, l.activation); } void backward_shortcut_layer(const layer l, network net) { gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); } #ifdef GPU void forward_shortcut_layer_gpu(const layer l, network net) { copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); } void backward_shortcut_layer_gpu(const layer l, network net) { gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); } #endif