diff --git a/include/darknet.h b/include/darknet.h index 5d87a832..5cfd274d 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -208,6 +208,8 @@ struct layer { int size; int side; int stride; + int stride_x; + int stride_y; int dilation; int maxpool_depth; int out_channels; diff --git a/src/box.c b/src/box.c index 640f54a2..1b5c4998 100644 --- a/src/box.c +++ b/src/box.c @@ -207,6 +207,8 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U); } + // GIoU = I/U - (C-U)/C + // C is the smallest convex hull that encloses both Detection and Truth if (iou_loss == GIOU) { if (C > 0) { // apply "C" term from gIOU diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 4d2ee49f..000efe90 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -34,8 +34,8 @@ void cudnn_maxpool_setup(layer *l) l->size, l->pad/2, //0, //l.pad, l->pad/2, //0, //l.pad, - l->stride, - l->stride); + l->stride_x, + l->stride_y); cudnnCreateTensorDescriptor(&l->srcTensorDesc); cudnnCreateTensorDescriptor(&l->dstTensorDesc); @@ -45,7 +45,7 @@ void cudnn_maxpool_setup(layer *l) } -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels) +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels) { maxpool_layer l = { (LAYER_TYPE)0 }; l.type = MAXPOOL; @@ -62,14 +62,16 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.out_h = l.h; } else { - l.out_w = (w + padding - size) / stride + 1; - l.out_h = (h + padding - size) / stride + 1; + l.out_w = (w + padding - size) / stride_x + 1; + l.out_h = (h + padding - size) / stride_y + 1; l.out_c = c; } l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = h*w*c; l.size = size; - l.stride = stride; + l.stride = stride_x; + l.stride_x = stride_x; + l.stride_y = stride_y; int output_size = l.out_h * l.out_w * l.out_c * batch; l.indexes = (int*)calloc(output_size, sizeof(int)); l.output = (float*)calloc(output_size, sizeof(float)); @@ -87,7 +89,11 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s #endif // GPU l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; - fprintf(stderr, "max %d x %d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + if(stride_x == stride_y) + fprintf(stderr, "max %d x %d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + else + fprintf(stderr, "max %d x %d/%2dx%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, stride_y, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + return l; } @@ -97,8 +103,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) l->w = w; l->inputs = h*w*l->c; - l->out_w = (w + l->pad - l->size) / l->stride + 1; - l->out_h = (h + l->pad - l->size) / l->stride + 1; + l->out_w = (w + l->pad - l->size) / l->stride_x + 1; + l->out_h = (h + l->pad - l->size) / l->stride_y + 1; l->outputs = l->out_w * l->out_h * l->out_c; int output_size = l->outputs * l->batch; @@ -151,7 +157,7 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state) } - if (!state.train) { + if (!state.train && l.stride_x == l.stride_y) { forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch); return; } @@ -173,8 +179,8 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state) int max_i = -1; for(n = 0; n < l.size; ++n){ for(m = 0; m < l.size; ++m){ - int cur_h = h_offset + i*l.stride + n; - int cur_w = w_offset + j*l.stride + m; + int cur_h = h_offset + i*l.stride_y + n; + int cur_w = w_offset + j*l.stride_x + m; int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); int valid = (cur_h >= 0 && cur_h < l.h && cur_w >= 0 && cur_w < l.w); diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index 0c1f6148..4994d457 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -12,7 +12,7 @@ typedef layer maxpool_layer; extern "C" { #endif image get_maxpool_image(maxpool_layer l); -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels); void resize_maxpool_layer(maxpool_layer *l, int w, int h); void forward_maxpool_layer(const maxpool_layer l, network_state state); void backward_maxpool_layer(const maxpool_layer l, network_state state); diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 82d631b3..8e851100 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -49,10 +49,10 @@ __global__ void backward_maxpool_depth_layer_kernel(int n, int w, int h, int c, } -__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *input, float *output, int *indexes) { - int h = (in_h + pad - size) / stride + 1; - int w = (in_w + pad - size) / stride + 1; + int h = (in_h + pad - size) / stride_y + 1; + int w = (in_w + pad - size) / stride_x + 1; int c = in_c; int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; @@ -75,8 +75,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c int l, m; for(l = 0; l < size; ++l){ for(m = 0; m < size; ++m){ - int cur_h = h_offset + i*stride + l; - int cur_w = w_offset + j*stride + m; + int cur_h = h_offset + i*stride_y + l; + int cur_w = w_offset + j*stride_x + m; int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); int valid = (cur_h >= 0 && cur_h < in_h && cur_w >= 0 && cur_w < in_w); @@ -89,12 +89,13 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c indexes[out_index] = max_i; } -__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *delta, float *prev_delta, int *indexes) { - int h = (in_h + pad - size) / stride + 1; - int w = (in_w + pad - size) / stride + 1; + int h = (in_h + pad - size) / stride_y + 1; + int w = (in_w + pad - size) / stride_x + 1; int c = in_c; - int area = (size-1)/stride; + int area_x = (size - 1) / stride_x; + int area_y = (size - 1) / stride_y; int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if(id >= n) return; @@ -113,10 +114,10 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_ float d = 0; int l, m; - for(l = -area; l < area+1; ++l){ - for(m = -area; m < area+1; ++m){ - int out_w = (j-w_offset)/stride + m; - int out_h = (i-h_offset)/stride + l; + for(l = -area_y; l < area_y+1; ++l){ + for(m = -area_x; m < area_x+1; ++m){ + int out_w = (j-w_offset)/stride_x + m; + int out_h = (i-h_offset)/stride_y + l; int out_index = out_w + w*(out_h + h*(k + c*b)); int valid = (out_w >= 0 && out_w < w && out_h >= 0 && out_h < h); @@ -172,7 +173,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta size_t n = h*w*c*layer.batch; - forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -192,6 +193,6 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st size_t n = layer.h*layer.w*layer.c*layer.batch; - backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu); + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu); CHECK_CUDA(cudaPeekAtLastError()); } diff --git a/src/parser.c b/src/parser.c index 09e79d2d..ac8f9613 100644 --- a/src/parser.c +++ b/src/parser.c @@ -535,6 +535,8 @@ layer parse_reorg_old(list *options, size_params params) maxpool_layer parse_maxpool(list *options, size_params params) { int stride = option_find_int(options, "stride",1); + int stride_x = option_find_int_quiet(options, "stride_x", stride); + int stride_y = option_find_int_quiet(options, "stride_y", stride); int size = option_find_int(options, "size",stride); int padding = option_find_int_quiet(options, "padding", size-1); int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0); @@ -547,7 +549,7 @@ maxpool_layer parse_maxpool(list *options, size_params params) batch=params.batch; if(!(h && w && c)) error("Layer before maxpool layer must output image."); - maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride, padding, maxpool_depth, out_channels); + maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels); return layer; } @@ -1332,12 +1334,12 @@ void load_convolutional_weights(layer l, FILE *fp) //return; } int num = l.nweights; - fread(l.biases, sizeof(float), l.n, fp); + if (fread(l.biases, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); //fread(l.weights, sizeof(float), num, fp); // as in connected layer if (l.batch_normalize && (!l.dontloadscales)){ - fread(l.scales, sizeof(float), l.n, fp); - fread(l.rolling_mean, sizeof(float), l.n, fp); - fread(l.rolling_variance, sizeof(float), l.n, fp); + if(fread(l.scales, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); + if(fread(l.rolling_mean, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); + if(fread(l.rolling_variance, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); if(0){ int i; for(i = 0; i < l.n; ++i){ @@ -1354,7 +1356,7 @@ void load_convolutional_weights(layer l, FILE *fp) fill_cpu(l.n, 0, l.rolling_variance, 1); } } - fread(l.weights, sizeof(float), num, fp); + if(fread(l.weights, sizeof(float), num, fp) < num) printf("\n Warning: Unexpected end of wights-file! \n"); //if(l.adam){ // fread(l.m, sizeof(float), num, fp); // fread(l.v, sizeof(float), num, fp);