mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Max pool layer can use stride=2 or stride_x=2 stride_y=4 (isn't tested well)
This commit is contained in:
@ -208,6 +208,8 @@ struct layer {
|
|||||||
int size;
|
int size;
|
||||||
int side;
|
int side;
|
||||||
int stride;
|
int stride;
|
||||||
|
int stride_x;
|
||||||
|
int stride_y;
|
||||||
int dilation;
|
int dilation;
|
||||||
int maxpool_depth;
|
int maxpool_depth;
|
||||||
int out_channels;
|
int out_channels;
|
||||||
|
@ -207,6 +207,8 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) {
|
|||||||
p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U);
|
p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GIoU = I/U - (C-U)/C
|
||||||
|
// C is the smallest convex hull that encloses both Detection and Truth
|
||||||
if (iou_loss == GIOU) {
|
if (iou_loss == GIOU) {
|
||||||
if (C > 0) {
|
if (C > 0) {
|
||||||
// apply "C" term from gIOU
|
// apply "C" term from gIOU
|
||||||
|
@ -34,8 +34,8 @@ void cudnn_maxpool_setup(layer *l)
|
|||||||
l->size,
|
l->size,
|
||||||
l->pad/2, //0, //l.pad,
|
l->pad/2, //0, //l.pad,
|
||||||
l->pad/2, //0, //l.pad,
|
l->pad/2, //0, //l.pad,
|
||||||
l->stride,
|
l->stride_x,
|
||||||
l->stride);
|
l->stride_y);
|
||||||
|
|
||||||
cudnnCreateTensorDescriptor(&l->srcTensorDesc);
|
cudnnCreateTensorDescriptor(&l->srcTensorDesc);
|
||||||
cudnnCreateTensorDescriptor(&l->dstTensorDesc);
|
cudnnCreateTensorDescriptor(&l->dstTensorDesc);
|
||||||
@ -45,7 +45,7 @@ void cudnn_maxpool_setup(layer *l)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels)
|
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels)
|
||||||
{
|
{
|
||||||
maxpool_layer l = { (LAYER_TYPE)0 };
|
maxpool_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = MAXPOOL;
|
l.type = MAXPOOL;
|
||||||
@ -62,14 +62,16 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
|||||||
l.out_h = l.h;
|
l.out_h = l.h;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
l.out_w = (w + padding - size) / stride + 1;
|
l.out_w = (w + padding - size) / stride_x + 1;
|
||||||
l.out_h = (h + padding - size) / stride + 1;
|
l.out_h = (h + padding - size) / stride_y + 1;
|
||||||
l.out_c = c;
|
l.out_c = c;
|
||||||
}
|
}
|
||||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||||
l.inputs = h*w*c;
|
l.inputs = h*w*c;
|
||||||
l.size = size;
|
l.size = size;
|
||||||
l.stride = stride;
|
l.stride = stride_x;
|
||||||
|
l.stride_x = stride_x;
|
||||||
|
l.stride_y = stride_y;
|
||||||
int output_size = l.out_h * l.out_w * l.out_c * batch;
|
int output_size = l.out_h * l.out_w * l.out_c * batch;
|
||||||
l.indexes = (int*)calloc(output_size, sizeof(int));
|
l.indexes = (int*)calloc(output_size, sizeof(int));
|
||||||
l.output = (float*)calloc(output_size, sizeof(float));
|
l.output = (float*)calloc(output_size, sizeof(float));
|
||||||
@ -87,7 +89,11 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
|||||||
|
|
||||||
#endif // GPU
|
#endif // GPU
|
||||||
l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
|
l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
|
||||||
fprintf(stderr, "max %d x %d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops);
|
if(stride_x == stride_y)
|
||||||
|
fprintf(stderr, "max %d x %d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "max %d x %d/%2dx%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, stride_y, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops);
|
||||||
|
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,8 +103,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
|
|||||||
l->w = w;
|
l->w = w;
|
||||||
l->inputs = h*w*l->c;
|
l->inputs = h*w*l->c;
|
||||||
|
|
||||||
l->out_w = (w + l->pad - l->size) / l->stride + 1;
|
l->out_w = (w + l->pad - l->size) / l->stride_x + 1;
|
||||||
l->out_h = (h + l->pad - l->size) / l->stride + 1;
|
l->out_h = (h + l->pad - l->size) / l->stride_y + 1;
|
||||||
l->outputs = l->out_w * l->out_h * l->out_c;
|
l->outputs = l->out_w * l->out_h * l->out_c;
|
||||||
int output_size = l->outputs * l->batch;
|
int output_size = l->outputs * l->batch;
|
||||||
|
|
||||||
@ -151,7 +157,7 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (!state.train) {
|
if (!state.train && l.stride_x == l.stride_y) {
|
||||||
forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch);
|
forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -173,8 +179,8 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state)
|
|||||||
int max_i = -1;
|
int max_i = -1;
|
||||||
for(n = 0; n < l.size; ++n){
|
for(n = 0; n < l.size; ++n){
|
||||||
for(m = 0; m < l.size; ++m){
|
for(m = 0; m < l.size; ++m){
|
||||||
int cur_h = h_offset + i*l.stride + n;
|
int cur_h = h_offset + i*l.stride_y + n;
|
||||||
int cur_w = w_offset + j*l.stride + m;
|
int cur_w = w_offset + j*l.stride_x + m;
|
||||||
int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c));
|
int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c));
|
||||||
int valid = (cur_h >= 0 && cur_h < l.h &&
|
int valid = (cur_h >= 0 && cur_h < l.h &&
|
||||||
cur_w >= 0 && cur_w < l.w);
|
cur_w >= 0 && cur_w < l.w);
|
||||||
|
@ -12,7 +12,7 @@ typedef layer maxpool_layer;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
image get_maxpool_image(maxpool_layer l);
|
image get_maxpool_image(maxpool_layer l);
|
||||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels);
|
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels);
|
||||||
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
|
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
|
||||||
void forward_maxpool_layer(const maxpool_layer l, network_state state);
|
void forward_maxpool_layer(const maxpool_layer l, network_state state);
|
||||||
void backward_maxpool_layer(const maxpool_layer l, network_state state);
|
void backward_maxpool_layer(const maxpool_layer l, network_state state);
|
||||||
|
@ -49,10 +49,10 @@ __global__ void backward_maxpool_depth_layer_kernel(int n, int w, int h, int c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
|
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *input, float *output, int *indexes)
|
||||||
{
|
{
|
||||||
int h = (in_h + pad - size) / stride + 1;
|
int h = (in_h + pad - size) / stride_y + 1;
|
||||||
int w = (in_w + pad - size) / stride + 1;
|
int w = (in_w + pad - size) / stride_x + 1;
|
||||||
int c = in_c;
|
int c = in_c;
|
||||||
|
|
||||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||||
@ -75,8 +75,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
|
|||||||
int l, m;
|
int l, m;
|
||||||
for(l = 0; l < size; ++l){
|
for(l = 0; l < size; ++l){
|
||||||
for(m = 0; m < size; ++m){
|
for(m = 0; m < size; ++m){
|
||||||
int cur_h = h_offset + i*stride + l;
|
int cur_h = h_offset + i*stride_y + l;
|
||||||
int cur_w = w_offset + j*stride + m;
|
int cur_w = w_offset + j*stride_x + m;
|
||||||
int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c));
|
int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c));
|
||||||
int valid = (cur_h >= 0 && cur_h < in_h &&
|
int valid = (cur_h >= 0 && cur_h < in_h &&
|
||||||
cur_w >= 0 && cur_w < in_w);
|
cur_w >= 0 && cur_w < in_w);
|
||||||
@ -89,12 +89,13 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
|
|||||||
indexes[out_index] = max_i;
|
indexes[out_index] = max_i;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
|
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *delta, float *prev_delta, int *indexes)
|
||||||
{
|
{
|
||||||
int h = (in_h + pad - size) / stride + 1;
|
int h = (in_h + pad - size) / stride_y + 1;
|
||||||
int w = (in_w + pad - size) / stride + 1;
|
int w = (in_w + pad - size) / stride_x + 1;
|
||||||
int c = in_c;
|
int c = in_c;
|
||||||
int area = (size-1)/stride;
|
int area_x = (size - 1) / stride_x;
|
||||||
|
int area_y = (size - 1) / stride_y;
|
||||||
|
|
||||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||||
if(id >= n) return;
|
if(id >= n) return;
|
||||||
@ -113,10 +114,10 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
|
|||||||
|
|
||||||
float d = 0;
|
float d = 0;
|
||||||
int l, m;
|
int l, m;
|
||||||
for(l = -area; l < area+1; ++l){
|
for(l = -area_y; l < area_y+1; ++l){
|
||||||
for(m = -area; m < area+1; ++m){
|
for(m = -area_x; m < area_x+1; ++m){
|
||||||
int out_w = (j-w_offset)/stride + m;
|
int out_w = (j-w_offset)/stride_x + m;
|
||||||
int out_h = (i-h_offset)/stride + l;
|
int out_h = (i-h_offset)/stride_y + l;
|
||||||
int out_index = out_w + w*(out_h + h*(k + c*b));
|
int out_index = out_w + w*(out_h + h*(k + c*b));
|
||||||
int valid = (out_w >= 0 && out_w < w &&
|
int valid = (out_w >= 0 && out_w < w &&
|
||||||
out_h >= 0 && out_h < h);
|
out_h >= 0 && out_h < h);
|
||||||
@ -172,7 +173,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta
|
|||||||
|
|
||||||
size_t n = h*w*c*layer.batch;
|
size_t n = h*w*c*layer.batch;
|
||||||
|
|
||||||
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream()>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
|
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream()>>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
|
||||||
CHECK_CUDA(cudaPeekAtLastError());
|
CHECK_CUDA(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,6 +193,6 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st
|
|||||||
|
|
||||||
size_t n = layer.h*layer.w*layer.c*layer.batch;
|
size_t n = layer.h*layer.w*layer.c*layer.batch;
|
||||||
|
|
||||||
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
|
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
|
||||||
CHECK_CUDA(cudaPeekAtLastError());
|
CHECK_CUDA(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
14
src/parser.c
14
src/parser.c
@ -535,6 +535,8 @@ layer parse_reorg_old(list *options, size_params params)
|
|||||||
maxpool_layer parse_maxpool(list *options, size_params params)
|
maxpool_layer parse_maxpool(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int stride = option_find_int(options, "stride",1);
|
int stride = option_find_int(options, "stride",1);
|
||||||
|
int stride_x = option_find_int_quiet(options, "stride_x", stride);
|
||||||
|
int stride_y = option_find_int_quiet(options, "stride_y", stride);
|
||||||
int size = option_find_int(options, "size",stride);
|
int size = option_find_int(options, "size",stride);
|
||||||
int padding = option_find_int_quiet(options, "padding", size-1);
|
int padding = option_find_int_quiet(options, "padding", size-1);
|
||||||
int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0);
|
int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0);
|
||||||
@ -547,7 +549,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
|
|||||||
batch=params.batch;
|
batch=params.batch;
|
||||||
if(!(h && w && c)) error("Layer before maxpool layer must output image.");
|
if(!(h && w && c)) error("Layer before maxpool layer must output image.");
|
||||||
|
|
||||||
maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride, padding, maxpool_depth, out_channels);
|
maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels);
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1332,12 +1334,12 @@ void load_convolutional_weights(layer l, FILE *fp)
|
|||||||
//return;
|
//return;
|
||||||
}
|
}
|
||||||
int num = l.nweights;
|
int num = l.nweights;
|
||||||
fread(l.biases, sizeof(float), l.n, fp);
|
if (fread(l.biases, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n");
|
||||||
//fread(l.weights, sizeof(float), num, fp); // as in connected layer
|
//fread(l.weights, sizeof(float), num, fp); // as in connected layer
|
||||||
if (l.batch_normalize && (!l.dontloadscales)){
|
if (l.batch_normalize && (!l.dontloadscales)){
|
||||||
fread(l.scales, sizeof(float), l.n, fp);
|
if(fread(l.scales, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n");
|
||||||
fread(l.rolling_mean, sizeof(float), l.n, fp);
|
if(fread(l.rolling_mean, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n");
|
||||||
fread(l.rolling_variance, sizeof(float), l.n, fp);
|
if(fread(l.rolling_variance, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n");
|
||||||
if(0){
|
if(0){
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < l.n; ++i){
|
for(i = 0; i < l.n; ++i){
|
||||||
@ -1354,7 +1356,7 @@ void load_convolutional_weights(layer l, FILE *fp)
|
|||||||
fill_cpu(l.n, 0, l.rolling_variance, 1);
|
fill_cpu(l.n, 0, l.rolling_variance, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fread(l.weights, sizeof(float), num, fp);
|
if(fread(l.weights, sizeof(float), num, fp) < num) printf("\n Warning: Unexpected end of wights-file! \n");
|
||||||
//if(l.adam){
|
//if(l.adam){
|
||||||
// fread(l.m, sizeof(float), num, fp);
|
// fread(l.m, sizeof(float), num, fp);
|
||||||
// fread(l.v, sizeof(float), num, fp);
|
// fread(l.v, sizeof(float), num, fp);
|
||||||
|
Reference in New Issue
Block a user