diff --git a/Makefile b/Makefile index 91164081..04e30f35 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ CFLAGS+= -DGPU LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand endif -OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o +OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o ifeq ($(GPU), 1) OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o coco_kernels.o endif diff --git a/cfg/msr_152.cfg b/cfg/msr_152.cfg index 5d5a3b25..b19c999d 100644 --- a/cfg/msr_152.cfg +++ b/cfg/msr_152.cfg @@ -1,13 +1,16 @@ [net] -batch=256 -subdivisions=16 +batch=128 +subdivisions=8 height=256 width=256 channels=3 momentum=0.9 -decay=0.0005 +decay=0.0001 -learning_rate=0.02 +learning_rate=0.1 +policy=poly +power=4 +max_batches=500000 [crop] crop_height=224 @@ -57,10 +60,22 @@ filters=256 size=1 stride=1 pad=1 -activation=leaky +activation=linear + +[route] +layers=-4 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +activation=linear +filters=256 [shortcut] -from = -4 +from = -3 +activation=leaky [convolutional] batch_normalize=1 @@ -84,11 +99,13 @@ filters=256 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + [convolutional] batch_normalize=1 filters=64 @@ -111,11 +128,13 @@ filters=256 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + ##### Conv 3_x ##### @@ -141,10 +160,24 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear + + +[route] +layers=-4 + +[convolutional] +batch_normalize=1 +size=1 +stride=2 +pad=1 +activation=linear +filters=512 [shortcut] -from = -4 +from = -3 +activation=leaky + [convolutional] batch_normalize=1 @@ -168,11 +201,14 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=128 @@ -195,11 +231,14 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=128 @@ -222,11 +261,14 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=128 @@ -249,11 +291,14 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=128 @@ -276,11 +321,14 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=128 @@ -303,11 +351,14 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=128 @@ -330,11 +381,14 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + ##### Conv 4_x ##### @@ -360,10 +414,24 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear + + +[route] +layers=-4 + +[convolutional] +batch_normalize=1 +size=1 +stride=2 +pad=1 +activation=linear +filters=1024 [shortcut] -from = -4 +from = -3 +activation=leaky + [convolutional] batch_normalize=1 @@ -387,11 +455,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -414,11 +485,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -441,11 +515,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -468,11 +545,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -495,11 +575,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -522,11 +605,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -549,11 +635,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -576,11 +665,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -603,11 +695,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -630,11 +725,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -657,11 +755,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -684,11 +785,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -711,11 +815,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -738,11 +845,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -765,11 +875,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -792,11 +905,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -819,11 +935,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -846,11 +965,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -873,11 +995,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -900,11 +1025,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -927,11 +1055,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -954,11 +1085,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -981,11 +1115,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1008,11 +1145,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1035,11 +1175,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1062,11 +1205,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1089,11 +1235,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1116,11 +1265,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1143,11 +1295,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1170,11 +1325,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1197,11 +1355,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1224,11 +1385,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1251,11 +1415,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1278,11 +1445,14 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + + [convolutional] batch_normalize=1 filters=256 @@ -1305,11 +1475,13 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + ##### Conv 5_x ##### @@ -1335,10 +1507,24 @@ filters=2048 size=1 stride=1 pad=1 -activation=leaky +activation=linear + + +[route] +layers=-4 + +[convolutional] +batch_normalize=1 +size=1 +stride=2 +pad=1 +activation=linear +filters=2048 [shortcut] -from = -4 +from = -3 +activation=leaky + [convolutional] batch_normalize=1 @@ -1362,11 +1548,13 @@ filters=2048 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + [convolutional] batch_normalize=1 filters=512 @@ -1389,11 +1577,13 @@ filters=2048 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky + [avgpool] [connected] diff --git a/cfg/msr_34.cfg b/cfg/msr_34.cfg index e561d45c..5ae23cf5 100644 --- a/cfg/msr_34.cfg +++ b/cfg/msr_34.cfg @@ -12,11 +12,6 @@ policy=poly power=4 max_batches=500000 -#policy=sigmoid -#gamma=.00008 -#step=100000 -#max_batches=200000 - [crop] crop_height=224 crop_width=224 diff --git a/cfg/msr_50.cfg b/cfg/msr_50.cfg index 31685f21..2edd21c1 100644 --- a/cfg/msr_50.cfg +++ b/cfg/msr_50.cfg @@ -1,13 +1,18 @@ [net] batch=128 -subdivisions=4 +subdivisions=8 height=256 width=256 channels=3 momentum=0.9 -decay=0.0005 +decay=0.0001 + +learning_rate=0.05 +policy=poly +power=4 +max_batches=500000 + -learning_rate=0.01 [crop] crop_height=224 @@ -57,10 +62,22 @@ filters=256 size=1 stride=1 pad=1 -activation=leaky +activation=linear + +[route] +layers=-4 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +activation=linear +filters=256 [shortcut] -from = -4 +from = -3 +activation=leaky [convolutional] batch_normalize=1 @@ -84,10 +101,11 @@ filters=256 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [convolutional] batch_normalize=1 @@ -111,10 +129,11 @@ filters=256 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky ##### Conv 3_x ##### @@ -141,10 +160,22 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear + +[route] +layers=-4 + +[convolutional] +batch_normalize=1 +size=1 +stride=2 +pad=1 +activation=linear +filters=512 [shortcut] -from = -4 +from = -3 +activation=leaky [convolutional] batch_normalize=1 @@ -168,10 +199,11 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [convolutional] batch_normalize=1 @@ -195,10 +227,11 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [convolutional] batch_normalize=1 @@ -222,10 +255,11 @@ filters=512 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky ##### Conv 4_x ##### @@ -252,10 +286,23 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear + +[route] +layers=-4 + +[convolutional] +batch_normalize=1 +size=1 +stride=2 +pad=1 +activation=linear +filters=1024 [shortcut] -from = -4 +from = -3 +activation=leaky + [convolutional] batch_normalize=1 @@ -279,10 +326,11 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [convolutional] batch_normalize=1 @@ -306,10 +354,11 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [convolutional] batch_normalize=1 @@ -333,10 +382,11 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [convolutional] batch_normalize=1 @@ -360,10 +410,11 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [convolutional] batch_normalize=1 @@ -387,10 +438,11 @@ filters=1024 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky ##### Conv 5_x ##### @@ -417,10 +469,24 @@ filters=2048 size=1 stride=1 pad=1 -activation=leaky +activation=linear + + +[route] +layers=-4 + +[convolutional] +batch_normalize=1 +size=1 +stride=2 +pad=1 +activation=linear +filters=2048 [shortcut] -from = -4 +from = -3 +activation=leaky + [convolutional] batch_normalize=1 @@ -444,10 +510,11 @@ filters=2048 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [convolutional] batch_normalize=1 @@ -471,10 +538,11 @@ filters=2048 size=1 stride=1 pad=1 -activation=leaky +activation=linear [shortcut] from = -4 +activation=leaky [avgpool] diff --git a/data/dog.jpg b/data/dog.jpg index deadd795..665a81c5 100644 Binary files a/data/dog.jpg and b/data/dog.jpg differ diff --git a/src/activation_layer.c b/src/activation_layer.c new file mode 100644 index 00000000..49e638d4 --- /dev/null +++ b/src/activation_layer.c @@ -0,0 +1,58 @@ +#include "activation_layer.h" +#include "utils.h" +#include "cuda.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation) +{ + layer l = {0}; + l.type = ACTIVE; + + l.inputs = inputs; + l.outputs = inputs; + l.batch=batch; + + l.output = calloc(batch*inputs, sizeof(float*)); + l.delta = calloc(batch*inputs, sizeof(float*)); + +#ifdef GPU + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); +#endif + l.activation = activation; + fprintf(stderr, "Activation Layer: %d inputs\n", inputs); + return l; +} + +void forward_activation_layer(layer l, network_state state) +{ + copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer(layer l, network_state state) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1); +} + +#ifdef GPU + +void forward_activation_layer_gpu(layer l, network_state state) +{ + copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); + activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer_gpu(layer l, network_state state) +{ + gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); +} +#endif diff --git a/src/activation_layer.h b/src/activation_layer.h new file mode 100644 index 00000000..a09756aa --- /dev/null +++ b/src/activation_layer.h @@ -0,0 +1,19 @@ +#ifndef ACTIVATION_LAYER_H +#define ACTIVATION_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation); + +void forward_activation_layer(layer l, network_state state); +void backward_activation_layer(layer l, network_state state); + +#ifdef GPU +void forward_activation_layer_gpu(layer l, network_state state); +void backward_activation_layer_gpu(layer l, network_state state); +#endif + +#endif + diff --git a/src/avgpool_layer.c b/src/avgpool_layer.c index 8eccde62..0feae710 100644 --- a/src/avgpool_layer.c +++ b/src/avgpool_layer.c @@ -28,8 +28,9 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) void resize_avgpool_layer(avgpool_layer *l, int w, int h) { - l->h = h; l->w = w; + l->h = h; + l->inputs = h*w*l->c; } void forward_avgpool_layer(const avgpool_layer l, network_state state) diff --git a/src/blas.c b/src/blas.c index 556603cf..8769df35 100644 --- a/src/blas.c +++ b/src/blas.c @@ -1,15 +1,26 @@ #include "blas.h" #include "math.h" +#include -void shortcut_cpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2) +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) { + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + int i,j,k,b; for(b = 0; b < batch; ++b){ - for(k = 0; k < c && k < c2; ++k){ - for(j = 0; j < h/sample; ++j){ - for(i = 0; i < w/sample; ++i){ - int out_index = i*sample + w*(j*sample + h*(k + c*b)); - int add_index = b*w*stride/sample*h*stride/sample*c2 + i*stride + w*stride/sample*(j*stride + h*stride/sample*k); + for(k = 0; k < minc; ++k){ + for(j = 0; j < minh; ++j){ + for(i = 0; i < minw; ++i){ + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); out[out_index] += add[add_index]; } } diff --git a/src/blas.h b/src/blas.h index 208fdaa5..aecdc593 100644 --- a/src/blas.h +++ b/src/blas.h @@ -16,7 +16,7 @@ void scal_cpu(int N, float ALPHA, float *X, int INCX); void fill_cpu(int N, float ALPHA, float * X, int INCX); float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); void test_gpu_blas(); -void shortcut_cpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); @@ -45,6 +45,6 @@ void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *varianc void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); -void shortcut_gpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); #endif #endif diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 8f05eb94..49406db2 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -1,6 +1,7 @@ #include "cuda_runtime.h" #include "curand.h" #include "cublas_v2.h" +#include extern "C" { #include "blas.h" @@ -374,26 +375,37 @@ extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX) check_error(cudaPeekAtLastError()); } -__global__ void shortcut_kernel(int size, float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2, int min_c) +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) { int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (id >= size) return; - int i = id % (w/sample); - id /= (w/sample); - int j = id % (h/sample); - id /= (h/sample); - int k = id % min_c; - id /= min_c; - int b = id; - int out_index = i*sample + w*(j*sample + h*(k + c*b)); - int add_index = b*w*stride/sample*h*stride/sample*c2 + i*stride + w*stride/sample*(j*stride + h*stride/sample*k); + int i = id % minw; + id /= minw; + int j = id % minh; + id /= minh; + int k = id % minc; + id /= minc; + int b = id % batch; + + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); out[out_index] += add[add_index]; } -extern "C" void shortcut_gpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2) +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) { - int min_c = (c < c2) ? c : c2; - int size = batch * w/sample * h/sample * min_c; - shortcut_kernel<<>>(size, out, w, h, c, batch, sample, add, stride, c2, min_c); + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + + int size = batch * minw * minh * minc; + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); check_error(cudaPeekAtLastError()); } diff --git a/src/classifier.c b/src/classifier.c index ddd88b19..9924c371 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -2,6 +2,7 @@ #include "utils.h" #include "parser.h" #include "option_list.h" +#include "blas.h" #ifdef OPENCV #include "opencv2/highgui/highgui_c.h" @@ -183,6 +184,145 @@ void validate_classifier(char *datacfg, char *filename, char *weightfile) } } +void validate_classifier_10(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network net = parse_network_cfg(filename); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 256, 256); + image images[10]; + images[0] = crop_image(im, -16, -16, 256, 256); + images[1] = crop_image(im, 16, -16, 256, 256); + images[2] = crop_image(im, 0, 0, 256, 256); + images[3] = crop_image(im, -16, 16, 256, 256); + images[4] = crop_image(im, 16, 16, 256, 256); + flip_image(im); + images[5] = crop_image(im, -16, -16, 256, 256); + images[6] = crop_image(im, 16, -16, 256, 256); + images[7] = crop_image(im, 0, 0, 256, 256); + images[8] = crop_image(im, -16, 16, 256, 256); + images[9] = crop_image(im, 16, 16, 256, 256); + float *pred = calloc(classes, sizeof(float)); + for(j = 0; j < 10; ++j){ + float *p = network_predict(net, images[j].data); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(images[j]); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_multi(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network net = parse_network_cfg(filename); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + int scales[] = {224, 256, 384, 480, 640}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + int w, h; + if(im.w < im.h){ + w = scales[j]; + h = (im.h*w)/im.w; + } else { + h = scales[j]; + w = (im.w * h) / im.h; + } + resize_network(&net, w, h); + image r = resize_image(im, w, h); + float *p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename) { network net = parse_network_cfg(cfgfile); @@ -296,7 +436,7 @@ void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_ free_matrix(pred); - fprintf(stderr, "%lf seconds, %d images\n", sec(clock()-time), val.X.rows); + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); free_data(val); } } @@ -319,6 +459,8 @@ void run_classifier(int argc, char **argv) else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights); else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); else if(0==strcmp(argv[2], "valid")) validate_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); } diff --git a/src/connected_layer.c b/src/connected_layer.c index 2d83dd91..c0a9d8b3 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -32,7 +32,7 @@ connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVAT //float scale = 1./sqrt(inputs); float scale = sqrt(2./inputs); for(i = 0; i < outputs*inputs; ++i){ - l.weights[i] = 2*scale*rand_uniform() - scale; + l.weights[i] = scale*rand_uniform(-1, 1); } for(i = 0; i < outputs; ++i){ diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index e97b00d7..871a84e1 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -65,7 +65,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int // float scale = 1./sqrt(size*size*c); float scale = sqrt(2./(size*size*c)); - for(i = 0; i < c*n*size*size; ++i) l.filters[i] = 2*scale*rand_uniform() - scale; + for(i = 0; i < c*n*size*size; ++i) l.filters[i] = scale*rand_uniform(-1, 1); int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); l.out_h = out_h; diff --git a/src/crop_layer.c b/src/crop_layer.c index 7b340841..66f11ebc 100644 --- a/src/crop_layer.c +++ b/src/crop_layer.c @@ -19,33 +19,51 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int l.h = h; l.w = w; l.c = c; + l.scale = (float)crop_height / h; l.flip = flip; l.angle = angle; l.saturation = saturation; l.exposure = exposure; - l.crop_width = crop_width; - l.crop_height = crop_height; l.out_w = crop_width; l.out_h = crop_height; l.out_c = c; l.inputs = l.w * l.h * l.c; l.outputs = l.out_w * l.out_h * l.out_c; - l.output = calloc(crop_width*crop_height * c*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float)); #ifdef GPU - l.output_gpu = cuda_make_array(l.output, crop_width*crop_height*c*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); l.rand_gpu = cuda_make_array(0, l.batch*8); #endif return l; } +void resize_crop_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->out_w = l->scale*w; + l->out_h = l->scale*h; + + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + #ifdef GPU + cuda_free(l->output_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + #endif +} + + void forward_crop_layer(const crop_layer l, network_state state) { int i,j,c,b,row,col; int index; int count = 0; int flip = (l.flip && rand()%2); - int dh = rand()%(l.h - l.crop_height + 1); - int dw = rand()%(l.w - l.crop_width + 1); + int dh = rand()%(l.h - l.out_h + 1); + int dw = rand()%(l.w - l.out_w + 1); float scale = 2; float trans = -1; if(l.noadjust){ @@ -54,13 +72,13 @@ void forward_crop_layer(const crop_layer l, network_state state) } if(!state.train){ flip = 0; - dh = (l.h - l.crop_height)/2; - dw = (l.w - l.crop_width)/2; + dh = (l.h - l.out_h)/2; + dw = (l.w - l.out_w)/2; } for(b = 0; b < l.batch; ++b){ for(c = 0; c < l.c; ++c){ - for(i = 0; i < l.crop_height; ++i){ - for(j = 0; j < l.crop_width; ++j){ + for(i = 0; i < l.out_h; ++i){ + for(j = 0; j < l.out_w; ++j){ if(flip){ col = l.w - dw - j - 1; }else{ diff --git a/src/crop_layer.h b/src/crop_layer.h index b4093510..12112f02 100644 --- a/src/crop_layer.h +++ b/src/crop_layer.h @@ -11,6 +11,7 @@ typedef layer crop_layer; image get_crop_image(crop_layer l); crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); void forward_crop_layer(const crop_layer l, network_state state); +void resize_crop_layer(layer *l, int w, int h); #ifdef GPU void forward_crop_layer_gpu(crop_layer l, network_state state); diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu index 8891030a..da635a6f 100644 --- a/src/crop_layer_kernels.cu +++ b/src/crop_layer_kernels.cu @@ -198,9 +198,9 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) levels_image_kernel<<>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale, layer.shift); check_error(cudaPeekAtLastError()); - size = layer.batch*layer.c*layer.crop_width*layer.crop_height; + size = layer.batch*layer.c*layer.out_w*layer.out_h; - forward_crop_layer_kernel<<>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.crop_height, layer.crop_width, state.train, layer.flip, radians, layer.output_gpu); + forward_crop_layer_kernel<<>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, state.train, layer.flip, radians, layer.output_gpu); check_error(cudaPeekAtLastError()); /* diff --git a/src/data.c b/src/data.c index 8d762765..88c89917 100644 --- a/src/data.c +++ b/src/data.c @@ -427,10 +427,10 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl int dw = (ow*jitter); int dh = (oh*jitter); - int pleft = (rand_uniform() * 2*dw - dw); - int pright = (rand_uniform() * 2*dw - dw); - int ptop = (rand_uniform() * 2*dh - dh); - int pbot = (rand_uniform() * 2*dh - dh); + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); int swidth = ow - pleft - pright; int sheight = oh - ptop - pbot; @@ -543,10 +543,10 @@ data load_data_swag(char **paths, int n, int classes, float jitter) int dw = w*jitter; int dh = h*jitter; - int pleft = (rand_uniform() * 2*dw - dw); - int pright = (rand_uniform() * 2*dw - dw); - int ptop = (rand_uniform() * 2*dh - dh); - int pbot = (rand_uniform() * 2*dh - dh); + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); int swidth = w - pleft - pright; int sheight = h - ptop - pbot; @@ -594,10 +594,10 @@ data load_data_detection(int n, char **paths, int m, int classes, int w, int h, int dw = ow/10; int dh = oh/10; - int pleft = (rand_uniform() * 2*dw - dw); - int pright = (rand_uniform() * 2*dw - dw); - int ptop = (rand_uniform() * 2*dh - dh); - int pbot = (rand_uniform() * 2*dh - dh); + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); int swidth = ow - pleft - pright; int sheight = oh - ptop - pbot; diff --git a/src/dropout_layer.c b/src/dropout_layer.c index 97dd47f5..bb410dcf 100644 --- a/src/dropout_layer.c +++ b/src/dropout_layer.c @@ -37,7 +37,7 @@ void forward_dropout_layer(dropout_layer l, network_state state) int i; if (!state.train) return; for(i = 0; i < l.batch * l.inputs; ++i){ - float r = rand_uniform(); + float r = rand_uniform(0, 1); l.rand[i] = r; if(r < l.probability) state.input[i] = 0; else state.input[i] *= l.scale; diff --git a/src/image.c b/src/image.c index 51871b55..d7d57d56 100644 --- a/src/image.c +++ b/src/image.c @@ -390,6 +390,17 @@ image make_image(int w, int h, int c) return out; } +image make_random_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + int i; + for(i = 0; i < w*h*c; ++i){ + out.data[i] = (rand_normal() * .25) + .5; + } + return out; +} + image float_to_image(int w, int h, int c, float *data) { image out = make_empty_image(w,h,c); @@ -692,6 +703,8 @@ image resize_image(image im, int w, int h) return resized; } +#include "cuda.h" + void test_resize(char *filename) { image im = load_image(filename, 0,0, 3); @@ -709,14 +722,27 @@ void test_resize(char *filename) image exp5 = copy_image(im); exposure_image(exp5, .5); - image r = resize_image(im, im.w/2, im.h/2); + #ifdef GPU + image r = resize_image(im, im.w, im.h); + image black = make_image(im.w*2 + 3, im.h*2 + 3, 9); + image black2 = make_image(im.w, im.h, 3); - image black = make_image(im.w, im.h, im.c); - shortcut_cpu(black.data, im.w, im.h, im.c, 1, 2, r.data, 1, r.c); + float *r_gpu = cuda_make_array(r.data, r.w*r.h*r.c); + float *black_gpu = cuda_make_array(black.data, black.w*black.h*black.c); + float *black2_gpu = cuda_make_array(black2.data, black2.w*black2.h*black2.c); + shortcut_gpu(3, r.w, r.h, 1, r_gpu, black.w, black.h, 3, black_gpu); + //flip_image(r); + //shortcut_gpu(3, r.w, r.h, 1, r.data, black.w, black.h, 3, black.data); + + shortcut_gpu(3, black.w, black.h, 3, black_gpu, black2.w, black2.h, 1, black2_gpu); + cuda_pull_array(black_gpu, black.data, black.w*black.h*black.c); + cuda_pull_array(black2_gpu, black2.data, black2.w*black2.h*black2.c); + show_image_layers(black, "Black"); + show_image(black2, "Recreate"); + #endif show_image(im, "Original"); show_image(gray, "Gray"); - show_image(black, "Black"); show_image(sat2, "Saturation-2"); show_image(sat5, "Saturation-.5"); show_image(exp2, "Exposure-2"); diff --git a/src/image.h b/src/image.h index c3e1a78b..4846bc19 100644 --- a/src/image.h +++ b/src/image.h @@ -58,6 +58,7 @@ void save_image_jpg(image p, char *name); void print_image(image m); image make_image(int w, int h, int c); +image make_random_image(int w, int h, int c); image make_empty_image(int w, int h, int c); image float_to_image(int w, int h, int c, float *data); image copy_image(image p); diff --git a/src/imagenet.c b/src/imagenet.c index dece9528..4c4d2bd6 100644 --- a/src/imagenet.c +++ b/src/imagenet.c @@ -21,7 +21,7 @@ void train_imagenet(char *cfgfile, char *weightfile) printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 1024; char **labels = get_labels("data/inet.labels.list"); - list *plist = get_paths("/data/imagenet/cls.train.list"); + list *plist = get_paths("data/inet.train.list"); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); int N = plist->size; @@ -62,6 +62,11 @@ void train_imagenet(char *cfgfile, char *weightfile) sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); save_weights(net, buff); } + if(*net.seen%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } } char buff[256]; sprintf(buff, "%s/%s.weights", backup_directory, base); diff --git a/src/layer.h b/src/layer.h index 1b120096..d8af6e40 100644 --- a/src/layer.h +++ b/src/layer.h @@ -20,7 +20,8 @@ typedef enum { NORMALIZATION, AVGPOOL, LOCAL, - SHORTCUT + SHORTCUT, + ACTIVE } LAYER_TYPE; typedef enum{ @@ -46,8 +47,6 @@ struct layer{ int side; int stride; int pad; - int crop_width; - int crop_height; int sqrt; int flip; int index; diff --git a/src/local_layer.c b/src/local_layer.c index c0f52cbc..9a5750fc 100644 --- a/src/local_layer.c +++ b/src/local_layer.c @@ -55,7 +55,7 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in // float scale = 1./sqrt(size*size*c); float scale = sqrt(2./(size*size*c)); - for(i = 0; i < c*n*size*size; ++i) l.filters[i] = 2*scale*rand_uniform() - scale; + for(i = 0; i < c*n*size*size; ++i) l.filters[i] = scale*rand_uniform(-1,1); l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float)); l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 20176277..7af49546 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -51,6 +51,7 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) int stride = l->stride; l->h = h; l->w = w; + l->inputs = h*w*l->c; l->out_w = (w-1)/stride + 1; l->out_h = (h-1)/stride + 1; diff --git a/src/network.c b/src/network.c index 8dee8cce..79579f10 100644 --- a/src/network.c +++ b/src/network.c @@ -10,6 +10,7 @@ #include "connected_layer.h" #include "local_layer.h" #include "convolutional_layer.h" +#include "activation_layer.h" #include "deconvolutional_layer.h" #include "detection_layer.h" #include "normalization_layer.h" @@ -73,6 +74,8 @@ char *get_layer_string(LAYER_TYPE a) switch(a){ case CONVOLUTIONAL: return "convolutional"; + case ACTIVE: + return "activation"; case LOCAL: return "local"; case DECONVOLUTIONAL: @@ -131,6 +134,8 @@ void forward_network(network net, network_state state) forward_convolutional_layer(l, state); } else if(l.type == DECONVOLUTIONAL){ forward_deconvolutional_layer(l, state); + } else if(l.type == ACTIVE){ + forward_activation_layer(l, state); } else if(l.type == LOCAL){ forward_local_layer(l, state); } else if(l.type == NORMALIZATION){ @@ -231,6 +236,8 @@ void backward_network(network net, network_state state) backward_convolutional_layer(l, state); } else if(l.type == DECONVOLUTIONAL){ backward_deconvolutional_layer(l, state); + } else if(l.type == ACTIVE){ + backward_activation_layer(l, state); } else if(l.type == NORMALIZATION){ backward_normalization_layer(l, state); } else if(l.type == MAXPOOL){ @@ -360,11 +367,12 @@ int resize_network(network *net, int w, int h) layer l = net->layers[i]; if(l.type == CONVOLUTIONAL){ resize_convolutional_layer(&l, w, h); + }else if(l.type == CROP){ + resize_crop_layer(&l, w, h); }else if(l.type == MAXPOOL){ resize_maxpool_layer(&l, w, h); }else if(l.type == AVGPOOL){ resize_avgpool_layer(&l, w, h); - break; }else if(l.type == NORMALIZATION){ resize_normalization_layer(&l, w, h); }else if(l.type == COST){ @@ -376,6 +384,7 @@ int resize_network(network *net, int w, int h) net->layers[i] = l; w = l.out_w; h = l.out_h; + if(l.type == AVGPOOL) break; } //fprintf(stderr, " Done!\n"); return 0; diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 0b506477..a83293da 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -18,6 +18,7 @@ extern "C" { #include "connected_layer.h" #include "detection_layer.h" #include "convolutional_layer.h" +#include "activation_layer.h" #include "deconvolutional_layer.h" #include "maxpool_layer.h" #include "avgpool_layer.h" @@ -48,6 +49,8 @@ void forward_network_gpu(network net, network_state state) forward_convolutional_layer_gpu(l, state); } else if(l.type == DECONVOLUTIONAL){ forward_deconvolutional_layer_gpu(l, state); + } else if(l.type == ACTIVE){ + forward_activation_layer_gpu(l, state); } else if(l.type == LOCAL){ forward_local_layer_gpu(l, state); } else if(l.type == DETECTION){ @@ -97,6 +100,8 @@ void backward_network_gpu(network net, network_state state) backward_convolutional_layer_gpu(l, state); } else if(l.type == DECONVOLUTIONAL){ backward_deconvolutional_layer_gpu(l, state); + } else if(l.type == ACTIVE){ + backward_activation_layer_gpu(l, state); } else if(l.type == LOCAL){ backward_local_layer_gpu(l, state); } else if(l.type == MAXPOOL){ diff --git a/src/nightmare.c b/src/nightmare.c index 1a78dd5a..ccc75255 100644 --- a/src/nightmare.c +++ b/src/nightmare.c @@ -108,6 +108,69 @@ void optimize_picture(network *net, image orig, int max_layer, float scale, floa } +void smooth(image recon, image update, float lambda, int num) +{ + int i, j, k; + int ii, jj; + for(k = 0; k < recon.c; ++k){ + for(j = 0; j < recon.h; ++j){ + for(i = 0; i < recon.w; ++i){ + int out_index = i + recon.w*(j + recon.h*k); + for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ + if (jj < 0) continue; + for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ + if (ii < 0) continue; + int in_index = ii + recon.w*(jj + recon.h*k); + update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); + } + } + } + } + } +} + +void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size) +{ + scale_image(recon, 2); + translate_image(recon, -1); + + image delta = make_image(recon.w, recon.h, recon.c); + + network_state state = {0}; +#ifdef GPU + state.input = cuda_make_array(recon.data, recon.w*recon.h*recon.c); + state.delta = cuda_make_array(delta.data, delta.w*delta.h*delta.c); + state.truth = cuda_make_array(features, get_network_output_size(net)); + + forward_network_gpu(net, state); + backward_network_gpu(net, state); + + cuda_pull_array(state.delta, delta.data, delta.w*delta.h*delta.c); + + cuda_free(state.input); + cuda_free(state.delta); + cuda_free(state.truth); +#else + state.input = recon.data; + state.delta = delta.data; + state.truth = features; + + forward_network(net, state); + backward_network(net, state); +#endif + + axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); + smooth(recon, update, lambda, smooth_size); + + axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); + scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); + + translate_image(recon, 1); + scale_image(recon, .5); + constrain_image(recon); + free_image(delta); +} + void run_nightmare(int argc, char **argv) { @@ -131,7 +194,11 @@ void run_nightmare(int argc, char **argv) float rate = find_float_arg(argc, argv, "-rate", .04); float thresh = find_float_arg(argc, argv, "-thresh", 1.); float rotate = find_float_arg(argc, argv, "-rotate", 0); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); network net = parse_network_cfg(cfg); load_weights(&net, weights); @@ -151,17 +218,38 @@ void run_nightmare(int argc, char **argv) im = resized; } + float *features; + image update; + if (reconstruct){ + resize_network(&net, im.w, im.h); + int size = get_network_output_size(net); + features = calloc(size, sizeof(float)); + float *out = network_predict(net, im.data); + copy_cpu(size, out, 1, features, 1); + free_image(im); + im = make_random_image(im.w, im.h, im.c); + update = make_image(im.w, im.h, im.c); + } + int e; int n; for(e = 0; e < rounds; ++e){ - fprintf(stderr, "Iteration: "); - fflush(stderr); + fprintf(stderr, "Iteration: "); + fflush(stderr); for(n = 0; n < iters; ++n){ fprintf(stderr, "%d, ", n); fflush(stderr); - int layer = max_layer + rand()%range - range/2; - int octave = rand()%octaves; - optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size); + show_image(im, "reconstruction"); + #ifdef OPENCV + cvWaitKey(10); + #endif + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } } fprintf(stderr, "done\n"); if(0){ diff --git a/src/parser.c b/src/parser.c index 8efafad8..218fd27a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -7,6 +7,7 @@ #include "crop_layer.h" #include "cost_layer.h" #include "convolutional_layer.h" +#include "activation_layer.h" #include "normalization_layer.h" #include "deconvolutional_layer.h" #include "connected_layer.h" @@ -29,6 +30,7 @@ typedef struct{ int is_network(section *s); int is_convolutional(section *s); +int is_activation(section *s); int is_local(section *s); int is_deconvolutional(section *s); int is_connected(section *s); @@ -301,10 +303,31 @@ layer parse_shortcut(list *options, size_params params, network net) layer from = net.layers[index]; layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + s.activation = activation; return s; } +layer parse_activation(list *options, size_params params) +{ + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + + layer l = make_activation_layer(params.batch, params.inputs, activation); + + l.out_h = params.h; + l.out_w = params.w; + l.out_c = params.c; + l.h = params.h; + l.w = params.w; + l.c = params.c; + + return l; +} + route_layer parse_route(list *options, size_params params, network net) { char *l = option_find(options, "layers"); @@ -447,6 +470,8 @@ network parse_network_cfg(char *filename) l = parse_convolutional(options, params); }else if(is_local(s)){ l = parse_local(options, params); + }else if(is_activation(s)){ + l = parse_activation(options, params); }else if(is_deconvolutional(s)){ l = parse_deconvolutional(options, params); }else if(is_connected(s)){ @@ -530,6 +555,10 @@ int is_convolutional(section *s) return (strcmp(s->type, "[conv]")==0 || strcmp(s->type, "[convolutional]")==0); } +int is_activation(section *s) +{ + return (strcmp(s->type, "[activation]")==0); +} int is_network(section *s) { return (strcmp(s->type, "[net]")==0 diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index ff1d50f0..bf455162 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -10,22 +10,15 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int layer l = {0}; l.type = SHORTCUT; l.batch = batch; - l.w = w; - l.h = h; - l.c = c; + l.w = w2; + l.h = h2; + l.c = c2; l.out_w = w; l.out_h = h; l.out_c = c; l.outputs = w*h*c; - l.inputs = w*h*c; - int stride = w2 / w; + l.inputs = l.outputs; - assert(stride * w == w2); - assert(stride * h == h2); - assert(c >= c2); - - l.stride = stride; - l.n = c2; l.index = index; l.delta = calloc(l.outputs*batch, sizeof(float)); @@ -40,25 +33,29 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int void forward_shortcut_layer(const layer l, network_state state) { copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); - shortcut_cpu(l.output, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output, l.stride, l.n); + shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output); + activate_array(l.output, l.outputs*l.batch, l.activation); } void backward_shortcut_layer(const layer l, network_state state) { - copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1); - shortcut_cpu(state.net.layers[l.index].delta, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta, 1, l.c); + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1); + shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta); } #ifdef GPU void forward_shortcut_layer_gpu(const layer l, network_state state) { copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); - shortcut_gpu(l.output_gpu, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output_gpu, l.stride, l.n); + shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); + activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); } void backward_shortcut_layer_gpu(const layer l, network_state state) { - copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); - shortcut_gpu(state.net.layers[l.index].delta_gpu, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta_gpu, 1, l.c); + gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1); + shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu); } #endif diff --git a/src/utils.c b/src/utils.c index 3ad09329..d49d0ce9 100644 --- a/src/utils.c +++ b/src/utils.c @@ -485,9 +485,9 @@ float rand_normal() } */ -float rand_uniform() +float rand_uniform(float min, float max) { - return (float)rand()/RAND_MAX; + return ((float)rand()/RAND_MAX * (max - min)) + min; } float **one_hot_encode(float *a, int n, int k) diff --git a/src/utils.h b/src/utils.h index 7e13e86f..96bd6cfe 100644 --- a/src/utils.h +++ b/src/utils.h @@ -34,7 +34,7 @@ int max_index(float *a, int n); float constrain(float min, float max, float a); float mse_array(float *a, int n); float rand_normal(); -float rand_uniform(); +float rand_uniform(float min, float max); float sum_array(float *a, int n); float mean_array(float *a, int n); void mean_arrays(float **a, int n, int els, float *avg); diff --git a/src/yolo.c b/src/yolo.c index a6c1e784..6bd4e6b2 100644 --- a/src/yolo.c +++ b/src/yolo.c @@ -14,7 +14,7 @@ image voc_labels[20]; void train_yolo(char *cfgfile, char *weightfile) { - char *train_images = "data/voc.0712.trainval"; + char *train_images = "/data/voc/train.txt"; char *backup_directory = "/home/pjreddie/backup/"; srand(time(0)); data_seed = time(0);