diff --git a/Makefile b/Makefile index 1f310448..b1f93c4c 100644 --- a/Makefile +++ b/Makefile @@ -68,8 +68,8 @@ EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) OBJS = $(addprefix $(OBJDIR), $(OBJ)) DEPS = $(wildcard src/*.h) Makefile include/darknet.h -#all: obj backup results $(SLIB) $(ALIB) $(EXEC) -all: obj results $(SLIB) $(ALIB) $(EXEC) +all: obj backup results $(SLIB) $(ALIB) $(EXEC) +#all: obj results $(SLIB) $(ALIB) $(EXEC) $(EXEC): $(EXECOBJ) $(ALIB) diff --git a/cfg/alexnet.cfg b/cfg/alexnet.cfg index 7e5a9b26..0458735f 100644 --- a/cfg/alexnet.cfg +++ b/cfg/alexnet.cfg @@ -90,6 +90,3 @@ activation=linear [softmax] groups=1 -[cost] -type=sse - diff --git a/cfg/cifar.cfg b/cfg/cifar.cfg index c10fae57..b2f69f53 100644 --- a/cfg/cifar.cfg +++ b/cfg/cifar.cfg @@ -119,6 +119,3 @@ activation=leaky [softmax] groups=1 - -[cost] - diff --git a/cfg/cifar.test.cfg b/cfg/cifar.test.cfg index d3afcdd7..18b6c54c 100644 --- a/cfg/cifar.test.cfg +++ b/cfg/cifar.test.cfg @@ -115,5 +115,3 @@ activation=leaky groups=1 temperature=3 -[cost] - diff --git a/cfg/darknet19.cfg b/cfg/darknet19.cfg index f56a46e2..28ac9669 100644 --- a/cfg/darknet19.cfg +++ b/cfg/darknet19.cfg @@ -203,6 +203,3 @@ activation=linear [softmax] groups=1 -[cost] -type=sse - diff --git a/cfg/darknet19_448.cfg b/cfg/darknet19_448.cfg index 133c688e..c6df7306 100644 --- a/cfg/darknet19_448.cfg +++ b/cfg/darknet19_448.cfg @@ -195,6 +195,3 @@ activation=linear [softmax] groups=1 -[cost] -type=sse - diff --git a/cfg/densenet201.cfg b/cfg/densenet201.cfg index 5e1e7dd1..65b4aecc 100644 --- a/cfg/densenet201.cfg +++ b/cfg/densenet201.cfg @@ -1949,6 +1949,3 @@ activation=linear [softmax] groups=1 -[cost] -type=sse - diff --git a/cfg/extraction.cfg b/cfg/extraction.cfg index 94e10675..e18d5c0d 100644 --- a/cfg/extraction.cfg +++ b/cfg/extraction.cfg @@ -201,6 +201,3 @@ activation=leaky [softmax] groups=1 -[cost] -type=sse - diff --git a/cfg/extraction22k.cfg b/cfg/extraction22k.cfg index 4cec6da9..b5f54090 100644 --- a/cfg/extraction22k.cfg +++ b/cfg/extraction22k.cfg @@ -204,6 +204,3 @@ activation=leaky [softmax] groups=1 -[cost] -type=sse - diff --git a/cfg/go.cfg b/cfg/go.cfg index 77d20c4d..c730092f 100644 --- a/cfg/go.cfg +++ b/cfg/go.cfg @@ -130,6 +130,3 @@ stride=1 [softmax] -[cost] -type=sse - diff --git a/cfg/go.test.cfg b/cfg/go.test.cfg index ba15c2e5..1e4e4380 100644 --- a/cfg/go.test.cfg +++ b/cfg/go.test.cfg @@ -129,6 +129,4 @@ stride=1 [softmax] -[cost] -type=sse diff --git a/cfg/gru.cfg b/cfg/gru.cfg index a68d3fc5..4ce9b180 100644 --- a/cfg/gru.cfg +++ b/cfg/gru.cfg @@ -27,6 +27,4 @@ activation=linear [softmax] -[cost] -type=sse diff --git a/cfg/resnet152.cfg b/cfg/resnet152.cfg index d5fe9094..e8e3297a 100644 --- a/cfg/resnet152.cfg +++ b/cfg/resnet152.cfg @@ -1458,6 +1458,3 @@ activation=linear [softmax] groups=1 -[cost] -type=sse - diff --git a/cfg/resnet50.cfg b/cfg/resnet50.cfg index bfe69b18..8c4d01fe 100644 --- a/cfg/resnet50.cfg +++ b/cfg/resnet50.cfg @@ -506,6 +506,4 @@ activation=linear [softmax] groups=1 -[cost] -type=sse diff --git a/cfg/rnn.cfg b/cfg/rnn.cfg index 68c032d2..61b202f3 100644 --- a/cfg/rnn.cfg +++ b/cfg/rnn.cfg @@ -35,6 +35,4 @@ activation=leaky [softmax] -[cost] -type=sse diff --git a/cfg/rnn.train.cfg b/cfg/rnn.train.cfg index 9139757f..b9748990 100644 --- a/cfg/rnn.train.cfg +++ b/cfg/rnn.train.cfg @@ -35,6 +35,4 @@ activation=leaky [softmax] -[cost] -type=sse diff --git a/cfg/strided.cfg b/cfg/strided.cfg index a52700b4..2f745085 100644 --- a/cfg/strided.cfg +++ b/cfg/strided.cfg @@ -180,6 +180,3 @@ activation=ramp [softmax] -[cost] -type=sse - diff --git a/cfg/tiny.cfg b/cfg/tiny.cfg index ba686f56..f97327cf 100644 --- a/cfg/tiny.cfg +++ b/cfg/tiny.cfg @@ -171,6 +171,4 @@ activation=linear [softmax] groups=1 -[cost] -type=sse diff --git a/cfg/vgg-16.cfg b/cfg/vgg-16.cfg index 2b6f7029..79694b87 100644 --- a/cfg/vgg-16.cfg +++ b/cfg/vgg-16.cfg @@ -148,6 +148,4 @@ activation=linear [softmax] groups=1 -[cost] -type=sse diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 8fa2ab2e..4a1047b8 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -111,9 +111,13 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network net) float *a = l.weights_gpu + j*l.nweights/l.groups; float *b = net.workspace; float *c = l.output_gpu + (i*l.groups + j)*n*m; + float *im = net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w; - im2col_gpu(net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w, - l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + if (l.size == 1){ + b = im; + } else { + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); } } @@ -236,22 +240,26 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network net) float *b = net.workspace; float *c = l.weight_updates_gpu + j*l.nweights/l.groups; - float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; - im2col_gpu(im, l.c/l.groups, l.h, l.w, - l.size, l.stride, l.pad, b); + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); - if(net.delta_gpu){ - if(l.binary || l.xnor) swap_binary(&l); + if (net.delta_gpu) { + if (l.binary || l.xnor) swap_binary(&l); a = l.weights_gpu + j*l.nweights/l.groups; b = l.delta_gpu + (i*l.groups + j)*m*k; c = net.workspace; + if (l.size == 1) { + c = imd; + } gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k); - col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, - l.pad, net.delta_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w); + if (l.size != 1) { + col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } if(l.binary || l.xnor) { swap_binary(&l); } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index e4fb9bde..5ac9ef0d 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -463,9 +463,13 @@ void forward_convolutional_layer(convolutional_layer l, network net) float *a = l.weights + j*l.nweights/l.groups; float *b = net.workspace; float *c = l.output + (i*l.groups + j)*n*m; + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; - im2col_cpu(net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w, - l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + if (l.size == 1) { + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); } } @@ -501,21 +505,31 @@ void backward_convolutional_layer(convolutional_layer l, network net) float *b = net.workspace; float *c = l.weight_updates + j*l.nweights/l.groups; - float *im = net.input+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if(l.size == 1){ + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, + l.size, l.stride, l.pad, b); + } - im2col_cpu(im, l.c/l.groups, l.h, l.w, - l.size, l.stride, l.pad, b); gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); - if(net.delta){ + if (net.delta) { a = l.weights + j*l.nweights/l.groups; b = l.delta + (i*l.groups + j)*m*k; c = net.workspace; + if (l.size == 1) { + c = imd; + } gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); - col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, - l.pad, net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w); + if (l.size != 1) { + col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } } } }