diff --git a/src/darknet.c b/src/darknet.c index 068c9e02..1dc073bc 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -377,6 +377,7 @@ int main(int argc, char **argv) #else if(gpu_index >= 0){ cuda_set_device(gpu_index); + check_error(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync)); } #endif diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 0eeb467f..928102f5 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -27,8 +27,8 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.w = w; l.c = c; l.pad = padding; - l.out_w = (w + 2 * padding - size) / stride + 1; - l.out_h = (h + 2 * padding - size) / stride + 1; + l.out_w = (w + padding - size) / stride + 1; + l.out_h = (h + padding - size) / stride + 1; l.out_c = c; l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = h*w*c; @@ -58,8 +58,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) l->w = w; l->inputs = h*w*l->c; - l->out_w = (w + 2 * l->pad - l->size) / l->stride + 1; - l->out_h = (h + 2 * l->pad - l->size) / l->stride + 1; + l->out_w = (w + l->pad - l->size) / l->stride + 1; + l->out_h = (h + l->pad - l->size) / l->stride + 1; l->outputs = l->out_w * l->out_h * l->c; int output_size = l->outputs * l->batch; @@ -80,8 +80,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) void forward_maxpool_layer(const maxpool_layer l, network_state state) { int b,i,j,k,m,n; - int w_offset = -l.pad; - int h_offset = -l.pad; + int w_offset = -l.pad / l.stride; + int h_offset = -l.pad / l.stride; int h = l.out_h; int w = l.out_w; diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 05b1f9ab..78b7f397 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -9,8 +9,8 @@ extern "C" { __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) { - int h = (in_h + 2 * pad - size) / stride + 1; - int w = (in_w + 2 * pad - size) / stride + 1; + int h = (in_h + pad - size) / stride + 1; + int w = (in_w + pad - size) / stride + 1; int c = in_c; int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; @@ -24,8 +24,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c id /= c; int b = id; - int w_offset = -pad; - int h_offset = -pad; + int w_offset = -pad / 2; + int h_offset = -pad / 2; int out_index = j + w*(i + h*(k + c*b)); float max = -INFINITY; @@ -49,8 +49,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) { - int h = (in_h + 2 * pad - size) / stride + 1; - int w = (in_w + 2 * pad - size) / stride + 1; + int h = (in_h + pad - size) / stride + 1; + int w = (in_w + pad - size) / stride + 1; int c = in_c; int area = (size-1)/stride; @@ -66,8 +66,8 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_ id /= in_c; int b = id; - int w_offset = -pad; - int h_offset = -pad; + int w_offset = -pad / 2; + int h_offset = -pad / 2; float d = 0; int l, m; diff --git a/src/parser.c b/src/parser.c index d91b1cab..c716ea9a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -457,7 +457,7 @@ maxpool_layer parse_maxpool(list *options, size_params params) { int stride = option_find_int(options, "stride",1); int size = option_find_int(options, "size",stride); - int padding = option_find_int_quiet(options, "padding", (size-1)/2); + int padding = option_find_int_quiet(options, "padding", size-1); int batch,h,w,c; h = params.h; @@ -511,7 +511,7 @@ layer parse_batchnorm(list *options, size_params params) layer parse_shortcut(list *options, size_params params, network net) { - char *l = option_find(options, "from"); + char *l = option_find(options, "from"); int index = atoi(l); if(index < 0) index = params.index + index; @@ -555,7 +555,7 @@ layer parse_upsample(list *options, size_params params, network net) route_layer parse_route(list *options, size_params params, network net) { - char *l = option_find(options, "layers"); + char *l = option_find(options, "layers"); int len = strlen(l); if(!l) error("Route Layer must specify input layers"); int n = 1; @@ -654,8 +654,8 @@ void parse_net_options(list *options, network *net) net->step = option_find_int(options, "step", 1); net->scale = option_find_float(options, "scale", 1); } else if (net->policy == STEPS){ - char *l = option_find(options, "steps"); - char *p = option_find(options, "scales"); + char *l = option_find(options, "steps"); + char *p = option_find(options, "scales"); if(!l || !p) error("STEPS policy must have steps and scales in cfg file"); int len = strlen(l); @@ -808,7 +808,7 @@ network parse_network_cfg_custom(char *filename, int batch) params.inputs = l.outputs; } if (l.bflops > 0) bflops += l.bflops; - } + } free_list(sections); net.outputs = get_network_output_size(net); net.output = get_network_output(net);