diff --git a/Makefile b/Makefile index d5c75e0f..91164081 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ CFLAGS+= -DGPU LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand endif -OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o +OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o ifeq ($(GPU), 1) OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o coco_kernels.o endif diff --git a/cfg/msr_152.cfg b/cfg/msr_152.cfg new file mode 100644 index 00000000..5d5a3b25 --- /dev/null +++ b/cfg/msr_152.cfg @@ -0,0 +1,1408 @@ +[net] +batch=256 +subdivisions=16 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.02 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +saturation=1 +exposure=1 +angle=0 + +##### Conv 1 ##### + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=3 +stride=2 + + +##### Conv 2_x ##### + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + + +##### Conv 3_x ##### + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + + +##### Conv 4_x ##### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + + +##### Conv 5_x ##### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/cfg/msr_34.cfg b/cfg/msr_34.cfg new file mode 100644 index 00000000..e561d45c --- /dev/null +++ b/cfg/msr_34.cfg @@ -0,0 +1,371 @@ +[net] +batch=128 +subdivisions=1 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=500000 + +#policy=sigmoid +#gamma=.00008 +#step=100000 +#max_batches=200000 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +saturation=1 +exposure=1 +angle=0 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=3 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + + + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + + + + + + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + + + + + + + + + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -3 + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/cfg/msr_50.cfg b/cfg/msr_50.cfg new file mode 100644 index 00000000..31685f21 --- /dev/null +++ b/cfg/msr_50.cfg @@ -0,0 +1,490 @@ +[net] +batch=128 +subdivisions=4 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +saturation=1 +exposure=1 +angle=0 + +##### Conv 1 ##### + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=3 +stride=2 + + +##### Conv 2_x ##### + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + + +##### Conv 3_x ##### + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + + +##### Conv 4_x ##### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + + +##### Conv 5_x ##### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from = -4 + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/src/avgpool_layer.h b/src/avgpool_layer.h index 634581a2..f8329aea 100644 --- a/src/avgpool_layer.h +++ b/src/avgpool_layer.h @@ -2,9 +2,9 @@ #define AVGPOOL_LAYER_H #include "image.h" -#include "params.h" #include "cuda.h" #include "layer.h" +#include "network.h" typedef layer avgpool_layer; diff --git a/src/blas.c b/src/blas.c index 37859376..941109ef 100644 --- a/src/blas.c +++ b/src/blas.c @@ -1,6 +1,22 @@ #include "blas.h" #include "math.h" +void shortcut_cpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2) +{ + int i,j,k,b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c && k < c2; ++k){ + for(j = 0; j < h/sample; ++j){ + for(i = 0; i < w/sample; ++i){ + int out_index = i*sample + w*(j*sample + h*(k + c*b)); + int add_index = b*w*stride/sample*h*stride/sample*c2 + i*stride + w*stride/sample*(j*stride + h*stride/sample*k); + out[out_index] += add[add_index]; + } + } + } + } +} + void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) { float scale = 1./(batch * spatial); diff --git a/src/blas.h b/src/blas.h index 5a50db5b..023024a4 100644 --- a/src/blas.h +++ b/src/blas.h @@ -15,6 +15,7 @@ void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); void scal_cpu(int N, float ALPHA, float *X, int INCX); float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); void test_gpu_blas(); +void shortcut_cpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2); void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); @@ -43,5 +44,6 @@ void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *varianc void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void shortcut_gpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2); #endif #endif diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 17955e46..8f05eb94 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -228,6 +228,7 @@ __global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) if(i < N) Y[i*INCY] *= X[i*INCX]; } + extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) { size_t N = batch*filters*spatial; @@ -372,3 +373,27 @@ extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX) fill_kernel<<>>(N, ALPHA, X, INCX); check_error(cudaPeekAtLastError()); } + +__global__ void shortcut_kernel(int size, float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2, int min_c) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + int i = id % (w/sample); + id /= (w/sample); + int j = id % (h/sample); + id /= (h/sample); + int k = id % min_c; + id /= min_c; + int b = id; + int out_index = i*sample + w*(j*sample + h*(k + c*b)); + int add_index = b*w*stride/sample*h*stride/sample*c2 + i*stride + w*stride/sample*(j*stride + h*stride/sample*k); + out[out_index] += add[add_index]; +} + +extern "C" void shortcut_gpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2) +{ + int min_c = (c < c2) ? c : c2; + int size = batch * w/sample * h/sample * min_c; + shortcut_kernel<<>>(size, out, w, h, c, batch, sample, add, stride, c2, min_c); + check_error(cudaPeekAtLastError()); +} diff --git a/src/classifier.c b/src/classifier.c index c0006e67..8a3ae5af 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -131,7 +131,7 @@ void validate_classifier(char *datacfg, char *filename, char *weightfile) char *label_list = option_find_str(options, "labels", "data/labels.list"); char *valid_list = option_find_str(options, "valid", "data/train.list"); int classes = option_find_int(options, "classes", 2); - int topk = option_find_int(options, "topk", 1); + int topk = option_find_int(options, "top", 1); char **labels = get_labels(label_list); list *plist = get_paths(valid_list); @@ -194,11 +194,12 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi list *options = read_data_cfg(datacfg); - char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); int top = option_find_int(options, "top", 1); int i = 0; - char **names = get_labels(label_list); + char **names = get_labels(name_list); clock_t time; int indexes[10]; char buff[256]; diff --git a/src/connected_layer.c b/src/connected_layer.c index 640e8b8b..2d83dd91 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -25,13 +25,13 @@ connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVAT l.weight_updates = calloc(inputs*outputs, sizeof(float)); l.bias_updates = calloc(outputs, sizeof(float)); - l.weights = calloc(inputs*outputs, sizeof(float)); + l.weights = calloc(outputs*inputs, sizeof(float)); l.biases = calloc(outputs, sizeof(float)); //float scale = 1./sqrt(inputs); float scale = sqrt(2./inputs); - for(i = 0; i < inputs*outputs; ++i){ + for(i = 0; i < outputs*inputs; ++i){ l.weights[i] = 2*scale*rand_uniform() - scale; } @@ -40,10 +40,10 @@ connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVAT } #ifdef GPU - l.weights_gpu = cuda_make_array(l.weights, inputs*outputs); + l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); l.biases_gpu = cuda_make_array(l.biases, outputs); - l.weight_updates_gpu = cuda_make_array(l.weight_updates, inputs*outputs); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); l.output_gpu = cuda_make_array(l.output, outputs*batch); @@ -76,7 +76,7 @@ void forward_connected_layer(connected_layer l, network_state state) float *a = state.input; float *b = l.weights; float *c = l.output; - gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); activate_array(l.output, l.outputs*l.batch, l.activation); } @@ -87,11 +87,11 @@ void backward_connected_layer(connected_layer l, network_state state) for(i = 0; i < l.batch; ++i){ axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); } - int m = l.inputs; + int m = l.outputs; int k = l.batch; - int n = l.outputs; - float *a = state.input; - float *b = l.delta; + int n = l.inputs; + float *a = l.delta; + float *b = state.input; float *c = l.weight_updates; gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); @@ -103,7 +103,7 @@ void backward_connected_layer(connected_layer l, network_state state) b = l.weights; c = state.delta; - if(c) gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); } #ifdef GPU @@ -146,7 +146,7 @@ void forward_connected_layer_gpu(connected_layer l, network_state state) float * a = state.input; float * b = l.weights_gpu; float * c = l.output_gpu; - gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n); activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); /* @@ -163,11 +163,11 @@ void backward_connected_layer_gpu(connected_layer l, network_state state) for(i = 0; i < l.batch; ++i){ axpy_ongpu_offset(l.outputs, 1, l.delta_gpu, i*l.outputs, 1, l.bias_updates_gpu, 0, 1); } - int m = l.inputs; + int m = l.outputs; int k = l.batch; - int n = l.outputs; - float * a = state.input; - float * b = l.delta_gpu; + int n = l.inputs; + float * a = l.delta_gpu; + float * b = state.input; float * c = l.weight_updates_gpu; gemm_ongpu(1,0,m,n,k,1,a,m,b,n,1,c,n); @@ -179,6 +179,6 @@ void backward_connected_layer_gpu(connected_layer l, network_state state) b = l.weights_gpu; c = state.delta; - if(c) gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(c) gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n); } #endif diff --git a/src/connected_layer.h b/src/connected_layer.h index cea5a023..2bf53b26 100644 --- a/src/connected_layer.h +++ b/src/connected_layer.h @@ -2,8 +2,8 @@ #define CONNECTED_LAYER_H #include "activations.h" -#include "params.h" #include "layer.h" +#include "network.h" typedef layer connected_layer; diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index 70a3d052..436ed7e9 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -6,6 +6,7 @@ #include "image.h" #include "activations.h" #include "layer.h" +#include "network.h" typedef layer convolutional_layer; diff --git a/src/cost_layer.h b/src/cost_layer.h index aa4af2f4..a692831e 100644 --- a/src/cost_layer.h +++ b/src/cost_layer.h @@ -1,7 +1,7 @@ #ifndef COST_LAYER_H #define COST_LAYER_H -#include "params.h" #include "layer.h" +#include "network.h" typedef layer cost_layer; diff --git a/src/crop_layer.h b/src/crop_layer.h index 81641862..b4093510 100644 --- a/src/crop_layer.h +++ b/src/crop_layer.h @@ -4,6 +4,7 @@ #include "image.h" #include "params.h" #include "layer.h" +#include "network.h" typedef layer crop_layer; diff --git a/src/cuda.c b/src/cuda.c index e95feff5..d773d0b9 100644 --- a/src/cuda.c +++ b/src/cuda.c @@ -67,6 +67,7 @@ float *cuda_make_array(float *x, int n) status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); check_error(status); } + if(!x_gpu) error("Cuda malloc failed\n"); return x_gpu; } diff --git a/src/data.c b/src/data.c index 9b84c5a8..8d762765 100644 --- a/src/data.c +++ b/src/data.c @@ -149,6 +149,43 @@ void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float } } +void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char *labelpath = find_replace(path, "images", "labels"); + labelpath = find_replace(labelpath, "JPEGImages", "labels"); + + labelpath = find_replace(labelpath, ".jpg", ".txt"); + labelpath = find_replace(labelpath, ".JPG", ".txt"); + labelpath = find_replace(labelpath, ".JPEG", ".txt"); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count && i < 30; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .0 || h < .0) continue; + + int index = (4+classes) * i; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + + if (id < classes) truth[index+id] = 1; + } + free(boxes); +} + void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) { char *labelpath = find_replace(path, "images", "labels"); @@ -482,6 +519,59 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h) return d; } +data load_data_swag(char **paths, int n, int classes, float jitter) +{ + int index = rand_r(&data_seed)%n; + char *random_path = paths[index]; + + image orig = load_image_color(random_path, 0, 0); + int h = orig.h; + int w = orig.w; + + data d; + d.shallow = 0; + d.w = w; + d.h = h; + + d.X.rows = 1; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + int k = (4+classes)*30; + d.y = make_matrix(1, k); + + int dw = w*jitter; + int dh = h*jitter; + + int pleft = (rand_uniform() * 2*dw - dw); + int pright = (rand_uniform() * 2*dw - dw); + int ptop = (rand_uniform() * 2*dh - dh); + int pbot = (rand_uniform() * 2*dh - dh); + + int swidth = w - pleft - pright; + int sheight = h - ptop - pbot; + + float sx = (float)swidth / w; + float sy = (float)sheight / h; + + int flip = rand_r(&data_seed)%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/w)/sx; + float dy = ((float)ptop /h)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + d.X.vals[0] = sized.data; + + fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + + return d; +} + data load_data_detection(int n, char **paths, int m, int classes, int w, int h, int num_boxes, int background) { char **random_paths = get_random_paths(paths, n, m); @@ -559,6 +649,8 @@ void *load_thread(void *ptr) *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); } else if (a.type == REGION_DATA){ *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter); + } else if (a.type == SWAG_DATA){ + *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); } else if (a.type == COMPARE_DATA){ *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); } else if (a.type == IMAGE_DATA){ diff --git a/src/data.h b/src/data.h index 0dacea28..0ebdfc3f 100644 --- a/src/data.h +++ b/src/data.h @@ -20,13 +20,14 @@ static inline float distance_from_edge(int x, int max) } typedef struct{ + int w, h; matrix X; matrix y; int shallow; } data; typedef enum { - CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA } data_type; typedef struct load_args{ diff --git a/src/deconvolutional_layer.h b/src/deconvolutional_layer.h index 74498c77..b6af3978 100644 --- a/src/deconvolutional_layer.h +++ b/src/deconvolutional_layer.h @@ -6,6 +6,7 @@ #include "image.h" #include "activations.h" #include "layer.h" +#include "network.h" typedef layer deconvolutional_layer; diff --git a/src/detection_layer.c b/src/detection_layer.c index 33f4f0b3..ca32bc03 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -146,7 +146,7 @@ void forward_detection_layer(const detection_layer l, network_state state) } float iou = box_iou(out, truth); - //printf("%d", best_index); + //printf("%d,", best_index); int p_index = index + locations*l.classes + i*l.n + best_index; *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); diff --git a/src/detection_layer.h b/src/detection_layer.h index 5e34ac7a..75d9b7f0 100644 --- a/src/detection_layer.h +++ b/src/detection_layer.h @@ -1,8 +1,8 @@ #ifndef REGION_LAYER_H #define REGION_LAYER_H -#include "params.h" #include "layer.h" +#include "network.h" typedef layer detection_layer; diff --git a/src/dropout_layer.h b/src/dropout_layer.h index b1dc883b..0c2ce4d8 100644 --- a/src/dropout_layer.h +++ b/src/dropout_layer.h @@ -3,6 +3,7 @@ #include "params.h" #include "layer.h" +#include "network.h" typedef layer dropout_layer; diff --git a/src/image.c b/src/image.c index a8a6684c..51871b55 100644 --- a/src/image.c +++ b/src/image.c @@ -1,5 +1,6 @@ #include "image.h" #include "utils.h" +#include "blas.h" #include #include @@ -708,8 +709,14 @@ void test_resize(char *filename) image exp5 = copy_image(im); exposure_image(exp5, .5); + image r = resize_image(im, im.w/2, im.h/2); + + image black = make_image(im.w, im.h, im.c); + shortcut_cpu(black.data, im.w, im.h, im.c, 1, 2, r.data, 1, r.c); + show_image(im, "Original"); show_image(gray, "Gray"); + show_image(black, "Black"); show_image(sat2, "Saturation-2"); show_image(sat5, "Saturation-.5"); show_image(exp2, "Exposure-2"); diff --git a/src/layer.h b/src/layer.h index b3ab6271..1b120096 100644 --- a/src/layer.h +++ b/src/layer.h @@ -3,6 +3,9 @@ #include "activations.h" +struct layer; +typedef struct layer layer; + typedef enum { CONVOLUTIONAL, DECONVOLUTIONAL, @@ -16,20 +19,22 @@ typedef enum { COST, NORMALIZATION, AVGPOOL, - LOCAL + LOCAL, + SHORTCUT } LAYER_TYPE; typedef enum{ SSE, MASKED } COST_TYPE; -typedef struct { +struct layer{ LAYER_TYPE type; ACTIVATION activation; COST_TYPE cost_type; int batch_normalize; int batch; int forced; + int flipped; int inputs; int outputs; int truths; @@ -45,6 +50,7 @@ typedef struct { int crop_height; int sqrt; int flip; + int index; float angle; float jitter; float saturation; @@ -144,7 +150,7 @@ typedef struct { float * squared_gpu; float * norms_gpu; #endif -} layer; +}; void free_layer(layer); diff --git a/src/local_layer.h b/src/local_layer.h index 675a5fbc..28915d81 100644 --- a/src/local_layer.h +++ b/src/local_layer.h @@ -2,10 +2,10 @@ #define LOCAL_LAYER_H #include "cuda.h" -#include "params.h" #include "image.h" #include "activations.h" #include "layer.h" +#include "network.h" typedef layer local_layer; diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index ab13874b..b91c2c18 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -5,6 +5,7 @@ #include "params.h" #include "cuda.h" #include "layer.h" +#include "network.h" typedef layer maxpool_layer; diff --git a/src/network.c b/src/network.c index d9585c47..8dee8cce 100644 --- a/src/network.c +++ b/src/network.c @@ -19,6 +19,7 @@ #include "softmax_layer.h" #include "dropout_layer.h" #include "route_layer.h" +#include "shortcut_layer.h" int get_current_batch(network net) { @@ -94,6 +95,8 @@ char *get_layer_string(LAYER_TYPE a) return "cost"; case ROUTE: return "route"; + case SHORTCUT: + return "shortcut"; case NORMALIZATION: return "normalization"; default: @@ -119,6 +122,7 @@ void forward_network(network net, network_state state) { int i; for(i = 0; i < net.n; ++i){ + state.index = i; layer l = net.layers[i]; if(l.delta){ scal_cpu(l.outputs * l.batch, 0, l.delta, 1); @@ -149,6 +153,8 @@ void forward_network(network net, network_state state) forward_dropout_layer(l, state); } else if(l.type == ROUTE){ forward_route_layer(l, net); + } else if(l.type == SHORTCUT){ + forward_shortcut_layer(l, state); } state.input = l.output; } @@ -211,6 +217,7 @@ void backward_network(network net, network_state state) float *original_input = state.input; float *original_delta = state.delta; for(i = net.n-1; i >= 0; --i){ + state.index = i; if(i == 0){ state.input = original_input; state.delta = original_delta; @@ -244,6 +251,8 @@ void backward_network(network net, network_state state) backward_cost_layer(l, state); } else if(l.type == ROUTE){ backward_route_layer(l, net); + } else if(l.type == SHORTCUT){ + backward_shortcut_layer(l, state); } } } @@ -255,6 +264,8 @@ float train_network_datum(network net, float *x, float *y) if(gpu_index >= 0) return train_network_datum_gpu(net, x, y); #endif network_state state; + state.index = 0; + state.net = net; state.input = x; state.delta = 0; state.truth = y; @@ -307,6 +318,8 @@ float train_network_batch(network net, data d, int n) { int i,j; network_state state; + state.index = 0; + state.net = net; state.train = 1; state.delta = 0; float sum = 0; @@ -443,6 +456,8 @@ float *network_predict(network net, float *input) #endif network_state state; + state.net = net; + state.index = 0; state.input = input; state.truth = 0; state.train = 0; diff --git a/src/network.h b/src/network.h index 428ff52c..4c108df0 100644 --- a/src/network.h +++ b/src/network.h @@ -3,15 +3,15 @@ #define NETWORK_H #include "image.h" -#include "detection_layer.h" #include "layer.h" #include "data.h" +#include "params.h" typedef enum { CONSTANT, STEP, EXP, POLY, STEPS, SIG } learning_rate_policy; -typedef struct { +typedef struct network{ int n; int batch; int *seen; @@ -43,6 +43,15 @@ typedef struct { #endif } network; +typedef struct network_state { + float *truth; + float *input; + float *delta; + int train; + int index; + network net; +} network_state; + #ifdef GPU float train_network_datum_gpu(network net, float *x, float *y); float *network_predict_gpu(network net, float *input); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 26b84041..0b506477 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -27,6 +27,7 @@ extern "C" { #include "softmax_layer.h" #include "dropout_layer.h" #include "route_layer.h" +#include "shortcut_layer.h" #include "blas.h" } @@ -38,6 +39,7 @@ void forward_network_gpu(network net, network_state state) { int i; for(i = 0; i < net.n; ++i){ + state.index = i; layer l = net.layers[i]; if(l.delta_gpu){ fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1); @@ -68,6 +70,8 @@ void forward_network_gpu(network net, network_state state) forward_dropout_layer_gpu(l, state); } else if(l.type == ROUTE){ forward_route_layer_gpu(l, net); + } else if(l.type == SHORTCUT){ + forward_shortcut_layer_gpu(l, state); } state.input = l.output_gpu; } @@ -79,6 +83,7 @@ void backward_network_gpu(network net, network_state state) float * original_input = state.input; float * original_delta = state.delta; for(i = net.n-1; i >= 0; --i){ + state.index = i; layer l = net.layers[i]; if(i == 0){ state.input = original_input; @@ -112,6 +117,8 @@ void backward_network_gpu(network net, network_state state) backward_cost_layer_gpu(l, state); } else if(l.type == ROUTE){ backward_route_layer_gpu(l, net); + } else if(l.type == SHORTCUT){ + backward_shortcut_layer_gpu(l, state); } } } @@ -138,6 +145,8 @@ void update_network_gpu(network net) float train_network_datum_gpu(network net, float *x, float *y) { network_state state; + state.index = 0; + state.net = net; int x_size = get_network_input_size(net)*net.batch; int y_size = get_network_output_size(net)*net.batch; if(net.layers[net.n-1].type == DETECTION) y_size = net.layers[net.n-1].truths*net.batch; @@ -178,6 +187,8 @@ float *network_predict_gpu(network net, float *input) { int size = get_network_input_size(net) * net.batch; network_state state; + state.index = 0; + state.net = net; state.input = cuda_make_array(input, size); state.truth = 0; state.train = 0; diff --git a/src/normalization_layer.h b/src/normalization_layer.h index c90780e8..ab327764 100644 --- a/src/normalization_layer.h +++ b/src/normalization_layer.h @@ -3,7 +3,7 @@ #include "image.h" #include "layer.h" -#include "params.h" +#include "network.h" layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); void resize_normalization_layer(layer *layer, int h, int w); diff --git a/src/params.h b/src/params.h index 7343a079..8b137891 100644 --- a/src/params.h +++ b/src/params.h @@ -1,12 +1 @@ -#ifndef PARAMS_H -#define PARAMS_H - -typedef struct { - float *truth; - float *input; - float *delta; - int train; -} network_state; - -#endif diff --git a/src/parser.c b/src/parser.c index 277c6e2f..8efafad8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -17,6 +17,7 @@ #include "avgpool_layer.h" #include "local_layer.h" #include "route_layer.h" +#include "shortcut_layer.h" #include "list.h" #include "option_list.h" #include "utils.h" @@ -37,6 +38,7 @@ int is_dropout(section *s); int is_softmax(section *s); int is_normalization(section *s); int is_crop(section *s); +int is_shortcut(section *s); int is_cost(section *s); int is_detection(section *s); int is_route(section *s); @@ -80,6 +82,7 @@ typedef struct size_params{ int h; int w; int c; + int index; } size_params; deconvolutional_layer parse_deconvolutional(list *options, size_params params) @@ -148,6 +151,7 @@ convolutional_layer parse_convolutional(list *options, size_params params) int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation, batch_normalize); + layer.flipped = option_find_int_quiet(options, "flipped", 0); char *weights = option_find_str(options, "weights", 0); char *biases = option_find_str(options, "biases", 0); @@ -287,6 +291,20 @@ layer parse_normalization(list *options, size_params params) return l; } +layer parse_shortcut(list *options, size_params params, network net) +{ + char *l = option_find(options, "from"); + int index = atoi(l); + if(index < 0) index = params.index + index; + + int batch = params.batch; + layer from = net.layers[index]; + + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + return s; +} + + route_layer parse_route(list *options, size_params params, network net) { char *l = option_find(options, "layers"); @@ -303,13 +321,14 @@ route_layer parse_route(list *options, size_params params, network net) for(i = 0; i < n; ++i){ int index = atoi(l); l = strchr(l, ',')+1; + if(index < 0) index = params.index + index; layers[i] = index; sizes[i] = net.layers[index].outputs; } int batch = params.batch; route_layer layer = make_route_layer(batch, n, layers, sizes); - + convolutional_layer first = net.layers[layers[0]]; layer.out_w = first.out_w; layer.out_h = first.out_h; @@ -419,6 +438,7 @@ network parse_network_cfg(char *filename) int count = 0; free_section(s); while(n){ + params.index = count; fprintf(stderr, "%d: ", count); s = (section *)n->val; options = s->options; @@ -447,6 +467,8 @@ network parse_network_cfg(char *filename) l = parse_avgpool(options, params); }else if(is_route(s)){ l = parse_route(options, params, net); + }else if(is_shortcut(s)){ + l = parse_shortcut(options, params, net); }else if(is_dropout(s)){ l = parse_dropout(options, params); l.output = net.layers[count-1].output; @@ -464,13 +486,13 @@ network parse_network_cfg(char *filename) net.layers[count] = l; free_section(s); n = n->next; + ++count; if(n){ params.h = l.out_h; params.w = l.out_w; params.c = l.out_c; params.inputs = l.outputs; } - ++count; } free_list(sections); net.outputs = get_network_output_size(net); @@ -478,6 +500,10 @@ network parse_network_cfg(char *filename) return net; } +int is_shortcut(section *s) +{ + return (strcmp(s->type, "[shortcut]")==0); +} int is_crop(section *s) { return (strcmp(s->type, "[crop]")==0); @@ -625,9 +651,12 @@ void save_weights_upto(network net, char *filename, int cutoff) FILE *fp = fopen(filename, "w"); if(!fp) file_error(filename); - fwrite(&net.learning_rate, sizeof(float), 1, fp); - fwrite(&net.momentum, sizeof(float), 1, fp); - fwrite(&net.decay, sizeof(float), 1, fp); + int major = 0; + int minor = 1; + int revision = 0; + fwrite(&major, sizeof(int), 1, fp); + fwrite(&minor, sizeof(int), 1, fp); + fwrite(&revision, sizeof(int), 1, fp); fwrite(net.seen, sizeof(int), 1, fp); int i; @@ -674,6 +703,19 @@ void save_weights(network net, char *filename) save_weights_upto(net, filename, net.n); } +void transpose_matrix(float *a, int rows, int cols) +{ + float *transpose = calloc(rows*cols, sizeof(float)); + int x, y; + for(x = 0; x < rows; ++x){ + for(y = 0; y < cols; ++y){ + transpose[y*rows + x] = a[x*cols + y]; + } + } + memcpy(a, transpose, rows*cols*sizeof(float)); + free(transpose); +} + void load_weights_upto(network *net, char *filename, int cutoff) { fprintf(stderr, "Loading weights from %s...", filename); @@ -681,10 +723,12 @@ void load_weights_upto(network *net, char *filename, int cutoff) FILE *fp = fopen(filename, "r"); if(!fp) file_error(filename); - float garbage; - fread(&garbage, sizeof(float), 1, fp); - fread(&garbage, sizeof(float), 1, fp); - fread(&garbage, sizeof(float), 1, fp); + int major; + int minor; + int revision; + fread(&major, sizeof(int), 1, fp); + fread(&minor, sizeof(int), 1, fp); + fread(&revision, sizeof(int), 1, fp); fread(net->seen, sizeof(int), 1, fp); int i; @@ -700,6 +744,9 @@ void load_weights_upto(network *net, char *filename, int cutoff) fread(l.rolling_variance, sizeof(float), l.n, fp); } fread(l.filters, sizeof(float), num, fp); + if (l.flipped) { + transpose_matrix(l.filters, l.c*l.size*l.size, l.n); + } #ifdef GPU if(gpu_index >= 0){ push_convolutional_layer(l); @@ -719,6 +766,9 @@ void load_weights_upto(network *net, char *filename, int cutoff) if(l.type == CONNECTED){ fread(l.biases, sizeof(float), l.outputs, fp); fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if(major > 1000 || minor > 1000){ + transpose_matrix(l.weights, l.inputs, l.outputs); + } #ifdef GPU if(gpu_index >= 0){ push_connected_layer(l); diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c new file mode 100644 index 00000000..ff1d50f0 --- /dev/null +++ b/src/shortcut_layer.c @@ -0,0 +1,64 @@ +#include "shortcut_layer.h" +#include "cuda.h" +#include "blas.h" +#include +#include + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +{ + fprintf(stderr,"Shortcut Layer: %d\n", index); + layer l = {0}; + l.type = SHORTCUT; + l.batch = batch; + l.w = w; + l.h = h; + l.c = c; + l.out_w = w; + l.out_h = h; + l.out_c = c; + l.outputs = w*h*c; + l.inputs = w*h*c; + int stride = w2 / w; + + assert(stride * w == w2); + assert(stride * h == h2); + assert(c >= c2); + + l.stride = stride; + l.n = c2; + l.index = index; + + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + #ifdef GPU + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + return l; +} + +void forward_shortcut_layer(const layer l, network_state state) +{ + copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); + shortcut_cpu(l.output, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output, l.stride, l.n); +} + +void backward_shortcut_layer(const layer l, network_state state) +{ + copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1); + shortcut_cpu(state.net.layers[l.index].delta, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta, 1, l.c); +} + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network_state state) +{ + copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); + shortcut_gpu(l.output_gpu, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output_gpu, l.stride, l.n); +} + +void backward_shortcut_layer_gpu(const layer l, network_state state) +{ + copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); + shortcut_gpu(state.net.layers[l.index].delta_gpu, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta_gpu, 1, l.c); +} +#endif diff --git a/src/shortcut_layer.h b/src/shortcut_layer.h new file mode 100644 index 00000000..c09a8097 --- /dev/null +++ b/src/shortcut_layer.h @@ -0,0 +1,16 @@ +#ifndef SHORTCUT_LAYER_H +#define SHORTCUT_LAYER_H + +#include "layer.h" +#include "network.h" + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +void forward_shortcut_layer(const layer l, network_state state); +void backward_shortcut_layer(const layer l, network_state state); + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network_state state); +void backward_shortcut_layer_gpu(const layer l, network_state state); +#endif + +#endif diff --git a/src/softmax_layer.h b/src/softmax_layer.h index f29c6521..9cbcd699 100644 --- a/src/softmax_layer.h +++ b/src/softmax_layer.h @@ -2,6 +2,7 @@ #define SOFTMAX_LAYER_H #include "params.h" #include "layer.h" +#include "network.h" typedef layer softmax_layer; diff --git a/src/yolo.c b/src/yolo.c index d62c533f..a6c1e784 100644 --- a/src/yolo.c +++ b/src/yolo.c @@ -255,9 +255,8 @@ void validate_yolo_recall(char *cfgfile, char *weightfile) int i=0; float thresh = .001; - int nms = 0; float iou_thresh = .5; - float nms_thresh = .5; + float nms = 0; int total = 0; int correct = 0; @@ -271,7 +270,7 @@ void validate_yolo_recall(char *cfgfile, char *weightfile) char *id = basecfg(path); float *predictions = network_predict(net, sized.data); convert_yolo_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1); - if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh); + if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms); char *labelpath = find_replace(path, "images", "labels"); labelpath = find_replace(labelpath, "JPEGImages", "labels");