From bd689f91ac13c9f5fcfb74b9409e972f011b3a65 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 13 Jun 2019 14:43:23 +0300 Subject: [PATCH] Added TridentNet (state-of-art on MS COCO) --- build/darknet/x64/cfg/resnet152_trident.cfg | 2177 +++++++++++++++++++ build/darknet/x64/partial.cmd | 2 + cfg/resnet152_trident.cfg | 2177 +++++++++++++++++++ include/darknet.h | 1 + src/coco.c | 4 + src/conv_lstm_layer.c | 22 +- src/convolutional_layer.c | 119 +- src/convolutional_layer.h | 2 +- src/crnn_layer.c | 6 +- src/layer.c | 52 +- src/parser.c | 15 +- 11 files changed, 4493 insertions(+), 84 deletions(-) create mode 100644 build/darknet/x64/cfg/resnet152_trident.cfg create mode 100644 cfg/resnet152_trident.cfg diff --git a/build/darknet/x64/cfg/resnet152_trident.cfg b/build/darknet/x64/cfg/resnet152_trident.cfg new file mode 100644 index 00000000..c7e0e255 --- /dev/null +++ b/build/darknet/x64/cfg/resnet152_trident.cfg @@ -0,0 +1,2177 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=64 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + + +learning_rate=0.001 +burn_in=1000 +max_batches = 10000 + +policy=sgdr +sgdr_cycle=1000 +sgdr_mult=2 +steps=4000,6000,8000,9000 +#scales=1, 1, 0.1, 0.1 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +### TridentNet - large objects - Start + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +## Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=2048 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=24 +activation=linear + +[yolo] +mask = 8,9,10,11 +anchors = 8,8, 10,13, 16,30, 33,23, 32,32, 30,61, 62,45, 59,119, 80,80, 116,90, 156,198, 373,326 +classes=1 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + +### TridentNet - large objects - End + + + + + + + +### TridentNet - medium objects - Start + +[route] +layers = 165 + +[convolutional] +share_index=166 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=167 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=168 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=170 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=171 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=172 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=174 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=175 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=176 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=178 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=179 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=180 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=182 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=183 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=184 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=186 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=187 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=188 +dilation=2 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +## Conv 5 +[convolutional] +share_index=190 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=191 +dilation=2 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +share_index=192 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=194 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=195 +dilation=2 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=196 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=198 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=199 +dilation=2 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=200 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 49 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=24 +activation=linear + +[yolo] +mask = 4,5,6,7 +anchors = 8,8, 10,13, 16,30, 33,23, 32,32, 30,61, 62,45, 64,64, 59,119, 116,90, 156,198, 373,326 +classes=1 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + +### TridentNet - medium objects - End + + + + + + + + + + + +### TridentNet - small objects - Start + +[route] +layers = 165 + +[convolutional] +share_index=166 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=167 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=168 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=170 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=171 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=172 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=174 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=175 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=176 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=178 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=179 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=180 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=182 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=183 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=184 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=186 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=187 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=188 +dilation=1 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +## Conv 5 +[convolutional] +share_index=190 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=191 +dilation=1 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +share_index=192 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=194 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=195 +dilation=1 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=196 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=198 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=199 +dilation=1 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=200 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=512 +activation=leaky + +[upsample] +stride=4 + +[route] +layers = -1, 17 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=24 +activation=linear + +[yolo] +mask = 0,1,2,3 +anchors = 8,8, 10,13, 16,30, 33,23, 32,32, 30,61, 62,45, 64,64, 59,119, 116,90, 156,198, 373,326 +classes=1 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + +### TridentNet - small objects - End + diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd index 03759e8a..5aa972f3 100644 --- a/build/darknet/x64/partial.cmd +++ b/build/darknet/x64/partial.cmd @@ -35,6 +35,8 @@ darknet.exe partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 darknet.exe partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.14 14 +darknet.exe partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.13 13 + darknet.exe partial cfg/yolo9000.cfg yolo9000.weights yolo9000.conv.22 22 diff --git a/cfg/resnet152_trident.cfg b/cfg/resnet152_trident.cfg new file mode 100644 index 00000000..c7e0e255 --- /dev/null +++ b/cfg/resnet152_trident.cfg @@ -0,0 +1,2177 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=64 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + + +learning_rate=0.001 +burn_in=1000 +max_batches = 10000 + +policy=sgdr +sgdr_cycle=1000 +sgdr_mult=2 +steps=4000,6000,8000,9000 +#scales=1, 1, 0.1, 0.1 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +### TridentNet - large objects - Start + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +## Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +dilation=3 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=2048 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=24 +activation=linear + +[yolo] +mask = 8,9,10,11 +anchors = 8,8, 10,13, 16,30, 33,23, 32,32, 30,61, 62,45, 59,119, 80,80, 116,90, 156,198, 373,326 +classes=1 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + +### TridentNet - large objects - End + + + + + + + +### TridentNet - medium objects - Start + +[route] +layers = 165 + +[convolutional] +share_index=166 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=167 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=168 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=170 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=171 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=172 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=174 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=175 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=176 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=178 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=179 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=180 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=182 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=183 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=184 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=186 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=187 +dilation=2 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=188 +dilation=2 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +## Conv 5 +[convolutional] +share_index=190 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=191 +dilation=2 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +share_index=192 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=194 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=195 +dilation=2 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=196 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=198 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=199 +dilation=2 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=200 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 49 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=24 +activation=linear + +[yolo] +mask = 4,5,6,7 +anchors = 8,8, 10,13, 16,30, 33,23, 32,32, 30,61, 62,45, 64,64, 59,119, 116,90, 156,198, 373,326 +classes=1 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + +### TridentNet - medium objects - End + + + + + + + + + + + +### TridentNet - small objects - Start + +[route] +layers = 165 + +[convolutional] +share_index=166 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=167 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=168 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=170 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=171 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=172 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=174 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=175 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=176 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=178 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=179 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=180 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=182 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=183 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=184 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=186 +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=187 +dilation=1 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=188 +dilation=1 +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +## Conv 5 +[convolutional] +share_index=190 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=191 +dilation=1 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +share_index=192 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=194 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=195 +dilation=1 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=196 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +share_index=198 +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=199 +dilation=1 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +share_index=200 +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=512 +activation=leaky + +[upsample] +stride=4 + +[route] +layers = -1, 17 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=24 +activation=linear + +[yolo] +mask = 0,1,2,3 +anchors = 8,8, 10,13, 16,30, 33,23, 32,32, 30,61, 62,45, 64,64, 59,119, 116,90, 156,198, 373,326 +classes=1 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + +### TridentNet - small objects - End + diff --git a/include/darknet.h b/include/darknet.h index 4154d664..e807ff9b 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -185,6 +185,7 @@ struct layer { void(*forward_gpu) (struct layer, struct network_state); void(*backward_gpu) (struct layer, struct network_state); void(*update_gpu) (struct layer, int, float, float, float); + layer *share_layer; int batch_normalize; int shortcut; int batch; diff --git a/src/coco.c b/src/coco.c index c1535a35..cdfd3dff 100644 --- a/src/coco.c +++ b/src/coco.c @@ -218,7 +218,11 @@ void validate_coco(char *cfgfile, char *weightfile) free_image(val_resized[t]); } } +#ifdef WIN32 + fseek(fp, -3, SEEK_CUR); +#else fseek(fp, -2, SEEK_CUR); +#endif fprintf(fp, "\n]\n"); fclose(fp); diff --git a/src/conv_lstm_layer.c b/src/conv_lstm_layer.c index 5caa3754..6cbaf1c3 100644 --- a/src/conv_lstm_layer.c +++ b/src/conv_lstm_layer.c @@ -66,44 +66,44 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // U l.uf = (layer*)calloc(1, sizeof(layer)); - *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.uf->batch = batch; if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; l.ui = (layer*)calloc(1, sizeof(layer)); - *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.ui->batch = batch; if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; l.ug = (layer*)calloc(1, sizeof(layer)); - *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.ug->batch = batch; if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; l.uo = (layer*)calloc(1, sizeof(layer)); - *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.uo->batch = batch; if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; // W l.wf = (layer*)calloc(1, sizeof(layer)); - *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.wf->batch = batch; if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; l.wi = (layer*)calloc(1, sizeof(layer)); - *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.wi->batch = batch; if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; l.wg = (layer*)calloc(1, sizeof(layer)); - *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.wg->batch = batch; if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; l.wo = (layer*)calloc(1, sizeof(layer)); - *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.wo->batch = batch; if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; @@ -111,21 +111,21 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // V l.vf = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.vf->batch = batch; if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size; } l.vi = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.vi->batch = batch; if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size; } l.vo = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.vo->batch = batch; if (l.workspace_size < l.vo->workspace_size) l.workspace_size = l.vo->workspace_size; } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 84bc75fb..de77a585 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -274,10 +274,11 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1)); CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w)); + //printf("\n l->dilation = %d, l->pad = %d, l->size = %d \n", l->dilation, l->pad, l->size); #if(CUDNN_MAJOR >= 6) - CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT)); // cudnn >= 6.0 + CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad * l->dilation, l->pad* l->dilation, l->stride, l->stride, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT)); // cudnn >= 6.0 #else - CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION)); // cudnn 5.1 + CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad * l->dilation, l->pad * l->dilation, l->stride, l->stride, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION)); // cudnn 5.1 #endif int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST; int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST; @@ -331,7 +332,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) #endif #endif -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index) +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer) { int total_batch = batch*steps; int i; @@ -341,6 +342,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (xnor) groups = 1; // disable groups for XNOR-net if (groups < 1) groups = 1; + l.share_layer = share_layer; l.index = index; l.h = h; l.w = w; @@ -360,11 +362,20 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.learning_rate_scale = 1; l.nweights = (c / groups) * n * size * size; - l.weights = (float*)calloc(l.nweights, sizeof(float)); - l.weight_updates = (float*)calloc(l.nweights, sizeof(float)); + if (l.share_layer) { + l.weights = l.share_layer->weights; + l.weight_updates = l.share_layer->weight_updates; - l.biases = (float*)calloc(n, sizeof(float)); - l.bias_updates = (float*)calloc(n, sizeof(float)); + l.biases = l.share_layer->biases; + l.bias_updates = l.share_layer->bias_updates; + } + else { + l.weights = (float*)calloc(l.nweights, sizeof(float)); + l.weight_updates = (float*)calloc(l.nweights, sizeof(float)); + + l.biases = (float*)calloc(n, sizeof(float)); + l.bias_updates = (float*)calloc(n, sizeof(float)); + } // float scale = 1./sqrt(size*size*c); float scale = sqrt(2./(size*size*c/groups)); @@ -411,20 +422,33 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } if(batch_normalize){ - l.scales = (float*)calloc(n, sizeof(float)); - l.scale_updates = (float*)calloc(n, sizeof(float)); - for(i = 0; i < n; ++i){ - l.scales[i] = 1; + if (l.share_layer) { + l.scales = l.share_layer->scales; + l.scale_updates = l.share_layer->scale_updates; + l.mean = l.share_layer->mean; + l.variance = l.share_layer->variance; + l.mean_delta = l.share_layer->mean_delta; + l.variance_delta = l.share_layer->variance_delta; + l.rolling_mean = l.share_layer->rolling_mean; + l.rolling_variance = l.share_layer->rolling_variance; + } + else { + l.scales = (float*)calloc(n, sizeof(float)); + l.scale_updates = (float*)calloc(n, sizeof(float)); + for (i = 0; i < n; ++i) { + l.scales[i] = 1; + } + + l.mean = (float*)calloc(n, sizeof(float)); + l.variance = (float*)calloc(n, sizeof(float)); + + l.mean_delta = (float*)calloc(n, sizeof(float)); + l.variance_delta = (float*)calloc(n, sizeof(float)); + + l.rolling_mean = (float*)calloc(n, sizeof(float)); + l.rolling_variance = (float*)calloc(n, sizeof(float)); } - l.mean = (float*)calloc(n, sizeof(float)); - l.variance = (float*)calloc(n, sizeof(float)); - - l.mean_delta = (float*)calloc(n, sizeof(float)); - l.variance_delta = (float*)calloc(n, sizeof(float)); - - l.rolling_mean = (float*)calloc(n, sizeof(float)); - l.rolling_variance = (float*)calloc(n, sizeof(float)); l.x = (float*)calloc(total_batch * l.outputs, sizeof(float)); l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float)); } @@ -452,16 +476,24 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.scale_m_gpu = cuda_make_array(l.scale_m, n); l.scale_v_gpu = cuda_make_array(l.scale_v, n); } - - l.weights_gpu = cuda_make_array(l.weights, l.nweights); - l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + if (l.share_layer) { + l.weights_gpu = l.share_layer->weights_gpu; + l.weight_updates_gpu = l.share_layer->weight_updates_gpu; + l.weights_gpu16 = l.share_layer->weights_gpu16; + l.weight_updates_gpu16 = l.share_layer->weight_updates_gpu16; + l.biases_gpu = l.share_layer->biases_gpu; + l.bias_updates_gpu = l.share_layer->bias_updates_gpu; + } + else { + l.weights_gpu = cuda_make_array(l.weights, l.nweights); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); #ifdef CUDNN_HALF - l.weights_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); - l.weight_updates_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); + l.weights_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); + l.weight_updates_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); #endif - - l.biases_gpu = cuda_make_array(l.biases, n); - l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + } l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n); @@ -476,18 +508,29 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } if(batch_normalize){ - l.scales_gpu = cuda_make_array(l.scales, n); - l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + if (l.share_layer) { + l.scales_gpu = l.share_layer->scales_gpu; + l.scale_updates_gpu = l.share_layer->scale_updates_gpu; + l.mean_gpu = l.share_layer->mean_gpu; + l.variance_gpu = l.share_layer->variance_gpu; + l.rolling_mean_gpu = l.share_layer->rolling_mean_gpu; + l.rolling_variance_gpu = l.share_layer->rolling_variance_gpu; + l.mean_delta_gpu = l.share_layer->mean_delta_gpu; + l.variance_delta_gpu = l.share_layer->variance_delta_gpu; + } + else { + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); - l.mean_gpu = cuda_make_array(l.mean, n); - l.variance_gpu = cuda_make_array(l.variance, n); + l.mean_gpu = cuda_make_array(l.mean, n); + l.variance_gpu = cuda_make_array(l.variance, n); - l.rolling_mean_gpu = cuda_make_array(l.mean, n); - l.rolling_variance_gpu = cuda_make_array(l.variance, n); - - l.mean_delta_gpu = cuda_make_array(l.mean, n); - l.variance_delta_gpu = cuda_make_array(l.variance, n); + l.rolling_mean_gpu = cuda_make_array(l.mean, n); + l.rolling_variance_gpu = cuda_make_array(l.variance, n); + l.mean_delta_gpu = cuda_make_array(l.mean, n); + l.variance_delta_gpu = cuda_make_array(l.variance, n); + } l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); } @@ -504,7 +547,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (l.xnor && l.use_bin_output) fprintf(stderr, "convXB"); else if (l.xnor) fprintf(stderr, "convX "); else fprintf(stderr, "conv "); - fprintf(stderr, "%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + fprintf(stderr, "%5d %2d x%2d /%2d(%d)%4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, size, size, stride, dilation, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); return l; } @@ -526,7 +569,7 @@ void denormalize_convolutional_layer(convolutional_layer l) void test_convolutional_layer() { - convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0); + convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, NULL); l.batch_normalize = 1; float data[] = {1,1,1,1,1, 1,1,1,1,1, diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index 586bd2cd..e937b4c5 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -30,7 +30,7 @@ void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); #endif size_t get_convolutional_workspace_size(layer l); -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index); +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer); void denormalize_convolutional_layer(convolutional_layer l); void resize_convolutional_layer(convolutional_layer *layer, int w, int h); void forward_convolutional_layer(const convolutional_layer layer, network_state state); diff --git a/src/crnn_layer.c b/src/crnn_layer.c index d7c75b50..7609003b 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -50,17 +50,17 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou l.state = (float*)calloc(l.hidden * l.batch * (l.steps + 1), sizeof(float)); l.input_layer = (layer*)calloc(1, sizeof(layer)); - *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.input_layer->batch = batch; if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; l.self_layer = (layer*)calloc(1, sizeof(layer)); - *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.self_layer->batch = batch; if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; l.output_layer = (layer*)calloc(1, sizeof(layer)); - *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0); + *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.output_layer->batch = batch; if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; diff --git a/src/layer.c b/src/layer.c index 6409bd89..ade58f4e 100644 --- a/src/layer.c +++ b/src/layer.c @@ -66,12 +66,12 @@ void free_layer(layer l) if (l.concat) free(l.concat); if (l.concat_delta) free(l.concat_delta); if (l.binary_weights) free(l.binary_weights); - if (l.biases) free(l.biases); - if (l.bias_updates) free(l.bias_updates); - if (l.scales) free(l.scales); - if (l.scale_updates) free(l.scale_updates); - if (l.weights) free(l.weights); - if (l.weight_updates) free(l.weight_updates); + if (l.biases) free(l.biases), l.biases = NULL; + if (l.bias_updates) free(l.bias_updates), l.bias_updates = NULL; + if (l.scales) free(l.scales), l.scales = NULL; + if (l.scale_updates) free(l.scale_updates), l.scale_updates = NULL; + if (l.weights) free(l.weights), l.weights = NULL; + if (l.weight_updates) free(l.weight_updates), l.weight_updates = NULL; if (l.align_bit_weights) free(l.align_bit_weights); if (l.mean_arr) free(l.mean_arr); #ifdef GPU @@ -89,12 +89,12 @@ void free_layer(layer l) if (l.squared) free(l.squared); if (l.norms) free(l.norms); if (l.spatial_mean) free(l.spatial_mean); - if (l.mean) free(l.mean); - if (l.variance) free(l.variance); - if (l.mean_delta) free(l.mean_delta); - if (l.variance_delta) free(l.variance_delta); - if (l.rolling_mean) free(l.rolling_mean); - if (l.rolling_variance) free(l.rolling_variance); + if (l.mean) free(l.mean), l.mean = NULL; + if (l.variance) free(l.variance), l.variance = NULL; + if (l.mean_delta) free(l.mean_delta), l.mean_delta = NULL; + if (l.variance_delta) free(l.variance_delta), l.variance_delta = NULL; + if (l.rolling_mean) free(l.rolling_mean), l.rolling_mean = NULL; + if (l.rolling_variance) free(l.rolling_variance), l.rolling_variance = NULL; if (l.x) free(l.x); if (l.x_norm) free(l.x_norm); if (l.m) free(l.m); @@ -143,12 +143,12 @@ void free_layer(layer l) if (l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); if (l.binary_input_gpu) cuda_free(l.binary_input_gpu); if (l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); - if (l.mean_gpu) cuda_free(l.mean_gpu); - if (l.variance_gpu) cuda_free(l.variance_gpu); - if (l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); - if (l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); - if (l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); - if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); + if (l.mean_gpu) cuda_free(l.mean_gpu), l.mean_gpu = NULL; + if (l.variance_gpu) cuda_free(l.variance_gpu), l.variance_gpu = NULL; + if (l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu), l.rolling_mean_gpu = NULL; + if (l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu), l.rolling_variance_gpu = NULL; + if (l.variance_delta_gpu) cuda_free(l.variance_delta_gpu), l.variance_delta_gpu = NULL; + if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu), l.mean_delta_gpu = NULL; if (l.x_gpu) cuda_free(l.x_gpu); // dont free if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); @@ -157,14 +157,14 @@ void free_layer(layer l) if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu); if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu); - if (l.weights_gpu) cuda_free(l.weights_gpu); - if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); - if (l.weights_gpu16) cuda_free(l.weights_gpu16); - if (l.weight_updates_gpu16) cuda_free(l.weight_updates_gpu16); - if (l.biases_gpu) cuda_free(l.biases_gpu); - if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); - if (l.scales_gpu) cuda_free(l.scales_gpu); - if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); + if (l.weights_gpu) cuda_free(l.weights_gpu), l.weights_gpu = NULL; + if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu), l.weight_updates_gpu = NULL; + if (l.weights_gpu16) cuda_free(l.weights_gpu16), l.weights_gpu16 = NULL; + if (l.weight_updates_gpu16) cuda_free(l.weight_updates_gpu16), l.weight_updates_gpu16 = NULL; + if (l.biases_gpu) cuda_free(l.biases_gpu), l.biases_gpu = NULL; + if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu), l.bias_updates_gpu = NULL; + if (l.scales_gpu) cuda_free(l.scales_gpu), l.scales_gpu = NULL; + if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu), l.scale_updates_gpu = NULL; if (l.output_gpu) cuda_free(l.output_gpu); if (l.delta_gpu) cuda_free(l.delta_gpu); if (l.rand_gpu) cuda_free(l.rand_gpu); diff --git a/src/parser.c b/src/parser.c index 0cb906a5..2c095132 100644 --- a/src/parser.c +++ b/src/parser.c @@ -147,13 +147,14 @@ local_layer parse_local(list *options, size_params params) return layer; } -convolutional_layer parse_convolutional(list *options, size_params params) +convolutional_layer parse_convolutional(list *options, size_params params, network net) { int n = option_find_int(options, "filters",1); int groups = option_find_int_quiet(options, "groups", 1); int size = option_find_int(options, "size",1); int stride = option_find_int(options, "stride",1); int dilation = option_find_int_quiet(options, "dilation", 1); + if (size == 1) dilation = 1; int pad = option_find_int_quiet(options, "pad",0); int padding = option_find_int_quiet(options, "padding",0); if(pad) padding = size/2; @@ -161,6 +162,10 @@ convolutional_layer parse_convolutional(list *options, size_params params) char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); + int share_index = option_find_int_quiet(options, "share_index", -1); + convolutional_layer *share_layer = NULL; + if(share_layer > -1) share_layer = &net.layers[share_index]; + int batch,h,w,c; h = params.h; w = params.w; @@ -172,7 +177,7 @@ convolutional_layer parse_convolutional(list *options, size_params params) int xnor = option_find_int_quiet(options, "xnor", 0); int use_bin_output = option_find_int_quiet(options, "bin_output", 0); - convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index); + convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, share_layer); layer.flipped = option_find_int_quiet(options, "flipped", 0); layer.dot = option_find_float_quiet(options, "dot", 0); @@ -838,7 +843,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) layer l = { (LAYER_TYPE)0 }; LAYER_TYPE lt = string_to_layer_type(s->type); if(lt == CONVOLUTIONAL){ - l = parse_convolutional(options, params); + l = parse_convolutional(options, params, net); }else if(lt == LOCAL){ l = parse_local(options, params); }else if(lt == ACTIVE){ @@ -1112,7 +1117,7 @@ void save_weights_upto(network net, char *filename, int cutoff) int i; for(i = 0; i < net.n && i < cutoff; ++i){ layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ + if(l.type == CONVOLUTIONAL && l.share_layer == NULL){ save_convolutional_weights(l, fp); } if(l.type == CONNECTED){ save_connected_weights(l, fp); @@ -1337,7 +1342,7 @@ void load_weights_upto(network *net, char *filename, int cutoff) for(i = 0; i < net->n && i < cutoff; ++i){ layer l = net->layers[i]; if (l.dontload) continue; - if(l.type == CONVOLUTIONAL){ + if(l.type == CONVOLUTIONAL && l.share_layer == NULL){ load_convolutional_weights(l, fp); } if(l.type == CONNECTED){