2015-11-16 06:51:26 +03:00
|
|
|
#include "cuda_runtime.h"
|
|
|
|
#include "curand.h"
|
|
|
|
#include "cublas_v2.h"
|
|
|
|
|
2015-07-14 01:04:21 +03:00
|
|
|
extern "C" {
|
2017-06-02 06:31:13 +03:00
|
|
|
#include "avgpool_layer.h"
|
|
|
|
#include "cuda.h"
|
2015-07-14 01:04:21 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
|
|
|
|
{
|
|
|
|
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
|
|
|
if(id >= n) return;
|
|
|
|
|
|
|
|
int k = id % c;
|
|
|
|
id /= c;
|
|
|
|
int b = id;
|
|
|
|
|
|
|
|
int i;
|
|
|
|
int out_index = (k + c*b);
|
|
|
|
output[out_index] = 0;
|
|
|
|
for(i = 0; i < w*h; ++i){
|
|
|
|
int in_index = i + h*w*(k + b*c);
|
|
|
|
output[out_index] += input[in_index];
|
|
|
|
}
|
|
|
|
output[out_index] /= w*h;
|
|
|
|
}
|
|
|
|
|
|
|
|
__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta)
|
|
|
|
{
|
|
|
|
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
|
|
|
if(id >= n) return;
|
|
|
|
|
|
|
|
int k = id % c;
|
|
|
|
id /= c;
|
|
|
|
int b = id;
|
|
|
|
|
|
|
|
int i;
|
|
|
|
int out_index = (k + c*b);
|
|
|
|
for(i = 0; i < w*h; ++i){
|
|
|
|
int in_index = i + h*w*(k + b*c);
|
2015-07-22 02:09:33 +03:00
|
|
|
in_delta[in_index] += out_delta[out_index] / (w*h);
|
2015-07-14 01:04:21 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-10 05:56:42 +03:00
|
|
|
extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net)
|
2015-07-14 01:04:21 +03:00
|
|
|
{
|
|
|
|
size_t n = layer.c*layer.batch;
|
|
|
|
|
2017-04-10 05:56:42 +03:00
|
|
|
forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu);
|
2015-07-14 01:04:21 +03:00
|
|
|
check_error(cudaPeekAtLastError());
|
|
|
|
}
|
|
|
|
|
2017-04-10 05:56:42 +03:00
|
|
|
extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net)
|
2015-07-14 01:04:21 +03:00
|
|
|
{
|
|
|
|
size_t n = layer.c*layer.batch;
|
|
|
|
|
2017-04-10 05:56:42 +03:00
|
|
|
backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu);
|
2015-07-14 01:04:21 +03:00
|
|
|
check_error(cudaPeekAtLastError());
|
|
|
|
}
|
|
|
|
|