2015-01-23 03:38:24 +03:00
extern "C" {
#include "crop_layer.h"
#include "cuda.h"
}
#define BLOCK 256
__global__ void forward_crop_layer_kernel(float *input, int size, int c, int h, int w, int crop_height, int crop_width, int dh, int dw, int flip, float *output)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(id >= size) return;
int count = id;
int j = id % crop_width;
id /= crop_width;
int i = id % crop_height;
id /= crop_height;
int k = id % c;
id /= c;
int b = id;
int col = (flip) ? w - dw - j - 1 : j + dw;
int row = i + dh;
int index = col+w*(row+h*(k + c*b));
output[count] = input[index];
}
2015-03-12 08:20:15 +03:00
extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
2015-01-23 03:38:24 +03:00
{
int flip = (layer.flip && rand()%2);
2015-01-31 09:05:23 +03:00
int dh = rand()%(layer.h - layer.crop_height + 1);
int dw = rand()%(layer.w - layer.crop_width + 1);
2015-03-12 08:20:15 +03:00
if(!state.train){
2015-01-31 09:05:23 +03:00
flip = 0;
dh = (layer.h - layer.crop_height)/2;
dw = (layer.w - layer.crop_width)/2;
}
2015-01-23 03:38:24 +03:00
int size = layer.batch*layer.c*layer.crop_width*layer.crop_height;
dim3 dimBlock(BLOCK, 1, 1);
dim3 dimGrid((size-1)/BLOCK + 1, 1, 1);
2015-03-12 08:20:15 +03:00
forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.c, layer.h, layer.w,
2015-01-23 03:38:24 +03:00
layer.crop_height, layer.crop_width, dh, dw, flip, layer.output_gpu);
check_error(cudaPeekAtLastError());
}