darknet/src/normalization_layer.c

#include "normalization_layer.h"
#include "blas.h"
#include <stdio.h>

layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
{
    fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
    layer layer = {0};
    layer.type = NORMALIZATION;
    layer.batch = batch;
    layer.h = layer.out_h = h;
    layer.w = layer.out_w = w;
    layer.c = layer.out_c = c;
    layer.kappa = kappa;
    layer.size = size;
    layer.alpha = alpha;
    layer.beta = beta;
    layer.output = calloc(h * w * c * batch, sizeof(float));
    layer.delta = calloc(h * w * c * batch, sizeof(float));
    layer.squared = calloc(h * w * c * batch, sizeof(float));
    layer.norms = calloc(h * w * c * batch, sizeof(float));
    layer.inputs = w*h*c;
    layer.outputs = layer.inputs;
    #ifdef GPU
    layer.output_gpu =  cuda_make_array(0, h * w * c * batch);
    layer.delta_gpu =   cuda_make_array(0, h * w * c * batch);
    layer.squared_gpu = cuda_make_array(0, h * w * c * batch);
    layer.norms_gpu =   cuda_make_array(0, h * w * c * batch);
    #endif
    return layer;
}

void resize_normalization_layer(layer *layer, int w, int h)
{
    int c = layer->c;
    int batch = layer->batch;
    layer->h = h;
    layer->w = w;
    layer->out_h = h;
    layer->out_w = w;
    layer->inputs = w*h*c;
    layer->outputs = layer->inputs;
    layer->output = realloc(layer->output, h * w * c * batch * sizeof(float));
    layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float));
    layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float));
    layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float));
#ifdef GPU
    cuda_free(layer->output_gpu);
    cuda_free(layer->delta_gpu); 
    cuda_free(layer->squared_gpu); 
    cuda_free(layer->norms_gpu);   
    layer->output_gpu =  cuda_make_array(0, h * w * c * batch);
    layer->delta_gpu =   cuda_make_array(0, h * w * c * batch);
    layer->squared_gpu = cuda_make_array(0, h * w * c * batch);
    layer->norms_gpu =   cuda_make_array(0, h * w * c * batch);
#endif
}

void forward_normalization_layer(const layer layer, network_state state)
{
    int k,b;
    int w = layer.w;
    int h = layer.h;
    int c = layer.c;
    scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1);

    for(b = 0; b < layer.batch; ++b){
        float *squared = layer.squared + w*h*c*b;
        float *norms   = layer.norms + w*h*c*b;
        float *input   = state.input + w*h*c*b;
        pow_cpu(w*h*c, 2, input, 1, squared, 1);

        const_cpu(w*h, layer.kappa, norms, 1);
        for(k = 0; k < layer.size/2; ++k){
            axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
        }

        for(k = 1; k < layer.c; ++k){
            copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
            int prev = k - ((layer.size-1)/2) - 1;
            int next = k + (layer.size/2);
            if(prev >= 0)      axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
            if(next < layer.c) axpy_cpu(w*h,  layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
        }
    }
    pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
    mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1);
}

void backward_normalization_layer(const layer layer, network_state state)
{
    // TODO This is approximate ;-)
    // Also this should add in to delta instead of overwritting.

    int w = layer.w;
    int h = layer.h;
    int c = layer.c;
    pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
    mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1);
}

#ifdef GPU
void forward_normalization_layer_gpu(const layer layer, network_state state)
{
    int k,b;
    int w = layer.w;
    int h = layer.h;
    int c = layer.c;
    scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1);

    for(b = 0; b < layer.batch; ++b){
        float *squared = layer.squared_gpu + w*h*c*b;
        float *norms   = layer.norms_gpu + w*h*c*b;
        float *input   = state.input + w*h*c*b;
        pow_ongpu(w*h*c, 2, input, 1, squared, 1);

        const_ongpu(w*h, layer.kappa, norms, 1);
        for(k = 0; k < layer.size/2; ++k){
            axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
        }

        for(k = 1; k < layer.c; ++k){
            copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
            int prev = k - ((layer.size-1)/2) - 1;
            int next = k + (layer.size/2);
            if(prev >= 0)      axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
            if(next < layer.c) axpy_ongpu(w*h,  layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
        }
    }
    pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
    mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1);
}

void backward_normalization_layer_gpu(const layer layer, network_state state)
{
    // TODO This is approximate ;-)

    int w = layer.w;
    int h = layer.h;
    int c = layer.c;
    pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
    mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1);
}
#endif
normalization layer 2015-07-10 01:22:14 +03:00			`#include "normalization_layer.h"`
			`#include "blas.h"`
			`#include <stdio.h>`

			`layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)`
			`{`
			`fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);`
			`layer layer = {0};`
			`layer.type = NORMALIZATION;`
			`layer.batch = batch;`
			`layer.h = layer.out_h = h;`
			`layer.w = layer.out_w = w;`
			`layer.c = layer.out_c = c;`
			`layer.kappa = kappa;`
			`layer.size = size;`
			`layer.alpha = alpha;`
			`layer.beta = beta;`
			`layer.output = calloc(h * w * c * batch, sizeof(float));`
			`layer.delta = calloc(h * w * c * batch, sizeof(float));`
			`layer.squared = calloc(h * w * c * batch, sizeof(float));`
			`layer.norms = calloc(h * w * c * batch, sizeof(float));`
			`layer.inputs = whc;`
			`layer.outputs = layer.inputs;`
			`#ifdef GPU`
			`layer.output_gpu = cuda_make_array(0, h * w * c * batch);`
			`layer.delta_gpu = cuda_make_array(0, h * w * c * batch);`
			`layer.squared_gpu = cuda_make_array(0, h * w * c * batch);`
			`layer.norms_gpu = cuda_make_array(0, h * w * c * batch);`
			`#endif`
			`return layer;`
			`}`

			`void resize_normalization_layer(layer *layer, int w, int h)`
			`{`
			`int c = layer->c;`
			`int batch = layer->batch;`
			`layer->h = h;`
			`layer->w = w;`
			`layer->out_h = h;`
			`layer->out_w = w;`
			`layer->inputs = whc;`
			`layer->outputs = layer->inputs;`
add avgpool layer 2015-07-14 01:04:21 +03:00			`layer->output = realloc(layer->output, h * w * c * batch * sizeof(float));`
			`layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float));`
			`layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float));`
			`layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float));`
normalization layer 2015-07-10 01:22:14 +03:00			`#ifdef GPU`
			`cuda_free(layer->output_gpu);`
			`cuda_free(layer->delta_gpu);`
			`cuda_free(layer->squared_gpu);`
			`cuda_free(layer->norms_gpu);`
			`layer->output_gpu = cuda_make_array(0, h * w * c * batch);`
			`layer->delta_gpu = cuda_make_array(0, h * w * c * batch);`
			`layer->squared_gpu = cuda_make_array(0, h * w * c * batch);`
			`layer->norms_gpu = cuda_make_array(0, h * w * c * batch);`
			`#endif`
			`}`

			`void forward_normalization_layer(const layer layer, network_state state)`
			`{`
			`int k,b;`
			`int w = layer.w;`
			`int h = layer.h;`
			`int c = layer.c;`
			`scal_cpu(whc*layer.batch, 0, layer.squared, 1);`

			`for(b = 0; b < layer.batch; ++b){`
			`float squared = layer.squared + whcb;`
			`float norms = layer.norms + whcb;`
			`float input = state.input + whcb;`
			`pow_cpu(whc, 2, input, 1, squared, 1);`

			`const_cpu(w*h, layer.kappa, norms, 1);`
			`for(k = 0; k < layer.size/2; ++k){`
			`axpy_cpu(wh, layer.alpha, squared + wh*k, 1, norms, 1);`
			`}`

			`for(k = 1; k < layer.c; ++k){`
			`copy_cpu(wh, norms + wh(k-1), 1, norms + wh*k, 1);`
			`int prev = k - ((layer.size-1)/2) - 1;`
			`int next = k + (layer.size/2);`
			`if(prev >= 0) axpy_cpu(wh, -layer.alpha, squared + whprev, 1, norms + wh*k, 1);`
			`if(next < layer.c) axpy_cpu(wh, layer.alpha, squared + whnext, 1, norms + wh*k, 1);`
			`}`
			`}`
			`pow_cpu(whc*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);`
			`mul_cpu(whc*layer.batch, state.input, 1, layer.output, 1);`
			`}`

			`void backward_normalization_layer(const layer layer, network_state state)`
			`{`
			`// TODO This is approximate ;-)`
Changes to make routing work better 2015-07-22 02:09:33 +03:00			`// Also this should add in to delta instead of overwritting.`
normalization layer 2015-07-10 01:22:14 +03:00
			`int w = layer.w;`
			`int h = layer.h;`
			`int c = layer.c;`
			`pow_cpu(whc*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);`
			`mul_cpu(whc*layer.batch, layer.delta, 1, state.delta, 1);`
			`}`

			`#ifdef GPU`
			`void forward_normalization_layer_gpu(const layer layer, network_state state)`
			`{`
			`int k,b;`
			`int w = layer.w;`
			`int h = layer.h;`
			`int c = layer.c;`
			`scal_ongpu(whc*layer.batch, 0, layer.squared_gpu, 1);`

			`for(b = 0; b < layer.batch; ++b){`
			`float squared = layer.squared_gpu + whcb;`
			`float norms = layer.norms_gpu + whcb;`
			`float input = state.input + whcb;`
			`pow_ongpu(whc, 2, input, 1, squared, 1);`

			`const_ongpu(w*h, layer.kappa, norms, 1);`
			`for(k = 0; k < layer.size/2; ++k){`
			`axpy_ongpu(wh, layer.alpha, squared + wh*k, 1, norms, 1);`
			`}`

			`for(k = 1; k < layer.c; ++k){`
			`copy_ongpu(wh, norms + wh(k-1), 1, norms + wh*k, 1);`
			`int prev = k - ((layer.size-1)/2) - 1;`
			`int next = k + (layer.size/2);`
			`if(prev >= 0) axpy_ongpu(wh, -layer.alpha, squared + whprev, 1, norms + wh*k, 1);`
			`if(next < layer.c) axpy_ongpu(wh, layer.alpha, squared + whnext, 1, norms + wh*k, 1);`
			`}`
			`}`
			`pow_ongpu(whc*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);`
			`mul_ongpu(whc*layer.batch, state.input, 1, layer.output_gpu, 1);`
			`}`

			`void backward_normalization_layer_gpu(const layer layer, network_state state)`
			`{`
			`// TODO This is approximate ;-)`

			`int w = layer.w;`
			`int h = layer.h;`
			`int c = layer.c;`
			`pow_ongpu(whc*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);`
			`mul_ongpu(whc*layer.batch, layer.delta_gpu, 1, state.delta, 1);`
			`}`
			`#endif`