From d4402d29c2845a9aef97f9394dc619986ffbdf0c Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sun, 30 Jun 2019 13:53:11 +0300 Subject: [PATCH] Added maxpool_depth= and out_channels= params to [maxpool] --- include/darknet.h | 2 + src/maxpool_layer.c | 53 ++++++++++++++++++++++++--- src/maxpool_layer.h | 2 +- src/maxpool_layer_kernels.cu | 71 +++++++++++++++++++++++++++++++++++- src/parser.c | 4 +- 5 files changed, 123 insertions(+), 9 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index eefe4df4..1584a362 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -208,6 +208,8 @@ struct layer { int side; int stride; int dilation; + int maxpool_depth; + int out_channels; int reverse; int flatten; int spatial; diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 00cb0473..4d2ee49f 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -45,7 +45,7 @@ void cudnn_maxpool_setup(layer *l) } -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels) { maxpool_layer l = { (LAYER_TYPE)0 }; l.type = MAXPOOL; @@ -54,9 +54,18 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.w = w; l.c = c; l.pad = padding; - l.out_w = (w + padding - size) / stride + 1; - l.out_h = (h + padding - size) / stride + 1; - l.out_c = c; + l.maxpool_depth = maxpool_depth; + l.out_channels = out_channels; + if (maxpool_depth) { + l.out_c = out_channels; + l.out_w = l.w; + l.out_h = l.h; + } + else { + l.out_w = (w + padding - size) / stride + 1; + l.out_h = (h + padding - size) / stride + 1; + l.out_c = c; + } l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = h*w*c; l.size = size; @@ -90,7 +99,7 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) l->out_w = (w + l->pad - l->size) / l->stride + 1; l->out_h = (h + l->pad - l->size) / l->stride + 1; - l->outputs = l->out_w * l->out_h * l->c; + l->outputs = l->out_w * l->out_h * l->out_c; int output_size = l->outputs * l->batch; l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int)); @@ -111,6 +120,37 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) void forward_maxpool_layer(const maxpool_layer l, network_state state) { + if (l.maxpool_depth) + { + int b, i, j, k, g; + for (b = 0; b < l.batch; ++b) { + #pragma omp parallel for + for (i = 0; i < l.h; ++i) { + for (j = 0; j < l.w; ++j) { + for (g = 0; g < l.out_c; ++g) + { + int out_index = j + l.w*(i + l.h*(g + l.out_c*b)); + float max = -FLT_MAX; + int max_i = -1; + + for (k = g; k < l.c; k += l.out_c) + { + int in_index = j + l.w*(i + l.h*(k + l.c*b)); + float val = state.input[in_index]; + + max_i = (val > max) ? in_index : max_i; + max = (val > max) ? val : max; + } + l.output[out_index] = max; + l.indexes[out_index] = max_i; + } + } + } + } + return; + } + + if (!state.train) { forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch); return; @@ -156,7 +196,8 @@ void backward_maxpool_layer(const maxpool_layer l, network_state state) int i; int h = l.out_h; int w = l.out_w; - int c = l.c; + int c = l.out_c; + #pragma omp parallel for for(i = 0; i < h*w*c*l.batch; ++i){ int index = l.indexes[i]; state.delta[index] += l.delta[i]; diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index 0a90c376..0c1f6148 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -12,7 +12,7 @@ typedef layer maxpool_layer; extern "C" { #endif image get_maxpool_image(maxpool_layer l); -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels); void resize_maxpool_layer(maxpool_layer *l, int w, int h); void forward_maxpool_layer(const maxpool_layer l, network_state state); void backward_maxpool_layer(const maxpool_layer l, network_state state); diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 341a5b8b..82d631b3 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -5,6 +5,50 @@ #include "maxpool_layer.h" #include "dark_cuda.h" +__global__ void forward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int out_c, int batch, float *input, float *output, int *indexes) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= n) return; + + int j = id % w; + id = id / w; + int i = id % h; + id = id / h; + //int g = id % out_c; + //id = id / out_c; + int b = id % batch; + + int k; + for (int g = 0; g < out_c; ++g) + { + int out_index = j + w*(i + h*(g + out_c*b)); + float max = -FLT_MAX; + int max_i = -1; + + for (k = g; k < c; k += out_c) + { + int in_index = j + w*(i + h*(k + c*b)); + float val = input[in_index]; + + max_i = (val > max) ? in_index : max_i; + max = (val > max) ? val : max; + } + output[out_index] = max; + indexes[out_index] = max_i; + } +} + + +__global__ void backward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int batch, float *delta, float *prev_delta, int *indexes) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= n) return; + + int index = indexes[id]; + prev_delta[index] += delta[id]; +} + + __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) { int h = (in_h + pad - size) / stride + 1; @@ -84,6 +128,19 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state) { + if (layer.maxpool_depth) { + int h = layer.out_h; + int w = layer.out_w; + int c = 1;// layer.out_c; + + size_t n = h*w*c*layer.batch; + + forward_maxpool_depth_layer_kernel << > >( + n, layer.w, layer.h, layer.c, layer.out_c, layer.batch, state.input, layer.output_gpu, layer.indexes_gpu); + CHECK_CUDA(cudaPeekAtLastError()); + + return; + } #ifdef CUDNN_DISABLED if (!state.train && layer.stride == layer.size) { @@ -111,7 +168,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta int h = layer.out_h; int w = layer.out_w; - int c = layer.c; + int c = layer.out_c; size_t n = h*w*c*layer.batch; @@ -121,6 +178,18 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) { + if (layer.maxpool_depth) { + int h = layer.out_h; + int w = layer.out_w; + int c = layer.out_c; + + size_t n = h * w * c * layer.batch; + + backward_maxpool_depth_layer_kernel << > >(n, layer.w, layer.h, layer.c, layer.batch, layer.delta_gpu, state.delta, layer.indexes_gpu); + CHECK_CUDA(cudaPeekAtLastError()); + return; + } + size_t n = layer.h*layer.w*layer.c*layer.batch; backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu); diff --git a/src/parser.c b/src/parser.c index 5f6658e6..30510132 100644 --- a/src/parser.c +++ b/src/parser.c @@ -534,6 +534,8 @@ maxpool_layer parse_maxpool(list *options, size_params params) int stride = option_find_int(options, "stride",1); int size = option_find_int(options, "size",stride); int padding = option_find_int_quiet(options, "padding", size-1); + int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0); + int out_channels = option_find_int_quiet(options, "out_channels", 1); int batch,h,w,c; h = params.h; @@ -542,7 +544,7 @@ maxpool_layer parse_maxpool(list *options, size_params params) batch=params.batch; if(!(h && w && c)) error("Layer before maxpool layer must output image."); - maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); + maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride, padding, maxpool_depth, out_channels); return layer; }