diff --git a/Makefile b/Makefile index f5524b90..3d3d5e43 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,14 @@ CUDNN=0 OPENCV=0 DEBUG=0 -ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52 +ARCH= -gencode arch=compute_20,code=[sm_20,sm_21] \ + -gencode arch=compute_30,code=sm_30 \ + -gencode arch=compute_35,code=sm_35 \ + -gencode arch=compute_50,code=[sm_50,compute_50] \ + -gencode arch=compute_52,code=[sm_52,compute_52] + +# This is what I use, uncomment if you know your arch and want to specify +# ARCH= -gencode arch=compute_52,code=compute_52 VPATH=./src/ EXEC=darknet diff --git a/cfg/jnet19.cfg b/cfg/jnet19.cfg new file mode 100644 index 00000000..bf73fb7b --- /dev/null +++ b/cfg/jnet19.cfg @@ -0,0 +1,194 @@ +[net] +batch=128 +subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=448 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/cfg/jnet19_448.cfg b/cfg/jnet19_448.cfg new file mode 100644 index 00000000..133c688e --- /dev/null +++ b/cfg/jnet19_448.cfg @@ -0,0 +1,200 @@ +[net] +batch=128 +subdivisions=4 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c index 55bd3a8b..b53548bd 100644 --- a/src/batchnorm_layer.c +++ b/src/batchnorm_layer.c @@ -130,10 +130,10 @@ void forward_batchnorm_layer(layer l, network_state state) mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); - scal_cpu(l.out_c, .99, l.rolling_mean, 1); - axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); - scal_cpu(l.out_c, .99, l.rolling_variance, 1); - axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); + scal_cpu(l.out_c, .9, l.rolling_mean, 1); + axpy_cpu(l.out_c, .1, l.mean, 1, l.rolling_mean, 1); + scal_cpu(l.out_c, .9, l.rolling_variance, 1); + axpy_cpu(l.out_c, .1, l.variance, 1, l.rolling_variance, 1); copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 709f77fb..ae9df8f0 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -133,6 +133,9 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state if(l.batch_normalize){ backward_batchnorm_layer_gpu(l, state); + //axpy_ongpu(l.outputs*l.batch, -state.net.decay, l.x_gpu, 1, l.delta_gpu, 1); + } else { + //axpy_ongpu(l.outputs*l.batch, -state.net.decay, l.output_gpu, 1, l.delta_gpu, 1); } float *original_input = state.input; diff --git a/src/parser.c b/src/parser.c index 84733d7b..87a64919 100644 --- a/src/parser.c +++ b/src/parser.c @@ -966,23 +966,28 @@ void load_convolutional_weights(layer l, FILE *fp) //return; } int num = l.n*l.c*l.size*l.size; - if(0){ - fread(l.biases + ((l.n != 1374)?0:5), sizeof(float), l.n, fp); - if (l.batch_normalize && (!l.dontloadscales)){ - fread(l.scales + ((l.n != 1374)?0:5), sizeof(float), l.n, fp); - fread(l.rolling_mean + ((l.n != 1374)?0:5), sizeof(float), l.n, fp); - fread(l.rolling_variance + ((l.n != 1374)?0:5), sizeof(float), l.n, fp); + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); } - fread(l.weights + ((l.n != 1374)?0:5*l.c*l.size*l.size), sizeof(float), num, fp); - }else{ - fread(l.biases, sizeof(float), l.n, fp); - if (l.batch_normalize && (!l.dontloadscales)){ - fread(l.scales, sizeof(float), l.n, fp); - fread(l.rolling_mean, sizeof(float), l.n, fp); - fread(l.rolling_variance, sizeof(float), l.n, fp); + if(0){ + fill_cpu(l.n, 0, l.rolling_mean, 1); + fill_cpu(l.n, 0, l.rolling_variance, 1); } - fread(l.weights, sizeof(float), num, fp); } + fread(l.weights, sizeof(float), num, fp); if(l.adam){ fread(l.m, sizeof(float), num, fp); fread(l.v, sizeof(float), num, fp);