diff --git a/build/darknet/x64/cfg/yolov3_5l.cfg b/build/darknet/x64/cfg/yolov3_5l.cfg new file mode 100644 index 00000000..fec157e0 --- /dev/null +++ b/build/darknet/x64/cfg/yolov3_5l.cfg @@ -0,0 +1,968 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 12,13,14 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 9,10,11 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +############### + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 11 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 4 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=64 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=64 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=64 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/cfg/yolov3_5l.cfg b/cfg/yolov3_5l.cfg new file mode 100644 index 00000000..fec157e0 --- /dev/null +++ b/cfg/yolov3_5l.cfg @@ -0,0 +1,968 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 12,13,14 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 9,10,11 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +############### + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 11 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 4 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=64 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=64 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=64 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=15 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/src/detector.c b/src/detector.c index 5a970add..fa550fe5 100644 --- a/src/detector.c +++ b/src/detector.c @@ -964,7 +964,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa return mean_average_precision; } -#ifdef OPENCV +//#ifdef OPENCV typedef struct { float w, h; } anchors_t; @@ -979,6 +979,17 @@ int anchors_comparator(const void *pa, const void *pb) return 0; } +int anchors_data_comparator(const float **pa, const float **pb) +{ + float *a = (float *)*pa; + float *b = (float *)*pb; + float diff = b[0]*b[1] - a[0]*a[1]; + if (diff < 0) return 1; + else if (diff > 0) return -1; + return 0; +} + + void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) { printf("\n num_of_clusters = %d, width = %d, height = %d \n", num_of_clusters, width, height); @@ -991,12 +1002,14 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 }; float *rel_width_height_array = calloc(1000, sizeof(float)); + list *options = read_data_cfg(datacfg); char *train_images = option_find_str(options, "train", "data/train.list"); list *plist = get_paths(train_images); int number_of_images = plist->size; char **paths = (char **)list_to_array(plist); + srand(time(0)); int number_of_boxes = 0; printf(" read labels from %d images \n", number_of_images); @@ -1030,85 +1043,57 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int } } printf("\n all loaded. \n"); + printf("\n calculating k-means++ ..."); - CvMat* points = cvCreateMat(number_of_boxes, 2, CV_32FC1); - CvMat* centers = cvCreateMat(num_of_clusters, 2, CV_32FC1); - CvMat* labels = cvCreateMat(number_of_boxes, 1, CV_32SC1); + matrix boxes_data; + model anchors_data; + boxes_data = make_matrix(number_of_boxes, 2); + printf("\n"); for (i = 0; i < number_of_boxes; ++i) { - points->data.fl[i * 2] = rel_width_height_array[i * 2]; - points->data.fl[i * 2 + 1] = rel_width_height_array[i * 2 + 1]; - //cvSet1D(points, i * 2, cvScalar(rel_width_height_array[i * 2], 0, 0, 0)); - //cvSet1D(points, i * 2 + 1, cvScalar(rel_width_height_array[i * 2 + 1], 0, 0, 0)); + float w = boxes_data.vals[i][0] = rel_width_height_array[i * 2]; + float h = boxes_data.vals[i][1] = rel_width_height_array[i * 2 + 1]; + //if (w > 410 || h > 410) printf("i:%d, w = %f, h = %f \n", i, w, h); } + // Is used: distance(box, centroid) = 1 - IoU(box, centroid) - const int attemps = 10; - double compactness; + // K-means + anchors_data = do_kmeans(boxes_data, num_of_clusters); - enum { - KMEANS_RANDOM_CENTERS = 0, - KMEANS_USE_INITIAL_LABELS = 1, - KMEANS_PP_CENTERS = 2 - }; - - printf("\n calculating k-means++ ..."); - // Should be used: distance(box, centroid) = 1 - IoU(box, centroid) - cvKMeans2(points, num_of_clusters, labels, - cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10000, 0), attemps, - 0, KMEANS_PP_CENTERS, - centers, &compactness); - - // sort anchors - qsort(centers->data.fl, num_of_clusters, 2*sizeof(float), anchors_comparator); - - //orig 2.0 anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 - //float orig_anch[] = { 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 }; - // worse than ours (even for 19x19 final size - for input size 608x608) - - //orig anchors = 1.3221,1.73145, 3.19275,4.00944, 5.05587,8.09892, 9.47112,4.84053, 11.2364,10.0071 - //float orig_anch[] = { 1.3221,1.73145, 3.19275,4.00944, 5.05587,8.09892, 9.47112,4.84053, 11.2364,10.0071 }; - // orig (IoU=59.90%) better than ours (59.75%) + qsort(anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), anchors_data_comparator); //gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 //float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 }; - // ours: anchors = 9.3813,6.0095, 3.3999,5.3505, 10.9476,11.1992, 5.0161,9.8314, 1.5003,2.1595 - //float orig_anch[] = { 9.3813,6.0095, 3.3999,5.3505, 10.9476,11.1992, 5.0161,9.8314, 1.5003,2.1595 }; - //for (i = 0; i < num_of_clusters * 2; ++i) centers->data.fl[i] = orig_anch[i]; - - //for (i = 0; i < number_of_boxes; ++i) - // printf("%2.2f,%2.2f, ", points->data.fl[i * 2], points->data.fl[i * 2 + 1]); - printf("\n"); float avg_iou = 0; for (i = 0; i < number_of_boxes; ++i) { - float box_w = points->data.fl[i * 2]; - float box_h = points->data.fl[i * 2 + 1]; + float box_w = rel_width_height_array[i * 2]; //points->data.fl[i * 2]; + float box_h = rel_width_height_array[i * 2 + 1]; //points->data.fl[i * 2 + 1]; //int cluster_idx = labels->data.i[i]; int cluster_idx = 0; float min_dist = FLT_MAX; + float best_iou = 0; for (j = 0; j < num_of_clusters; ++j) { - float anchor_w = centers->data.fl[j * 2]; - float anchor_h = centers->data.fl[j * 2 + 1]; - float w_diff = anchor_w - box_w; - float h_diff = anchor_h - box_h; - float distance = sqrt(w_diff*w_diff + h_diff*h_diff); - if (distance < min_dist) min_dist = distance, cluster_idx = j; + float anchor_w = anchors_data.centers.vals[j][0]; // centers->data.fl[j * 2]; + float anchor_h = anchors_data.centers.vals[j][1]; // centers->data.fl[j * 2 + 1]; + float min_w = (box_w < anchor_w) ? box_w : anchor_w; + float min_h = (box_h < anchor_h) ? box_h : anchor_h; + float box_intersect = min_w*min_h; + float box_union = box_w*box_h + anchor_w*anchor_h - box_intersect; + float iou = box_intersect / box_union; + float distance = 1 - iou; + if (distance < min_dist) min_dist = distance, cluster_idx = j, best_iou = iou; } - float anchor_w = centers->data.fl[cluster_idx * 2]; - float anchor_h = centers->data.fl[cluster_idx * 2 + 1]; - float min_w = (box_w < anchor_w) ? box_w : anchor_w; - float min_h = (box_h < anchor_h) ? box_h : anchor_h; - float box_intersect = min_w*min_h; - float box_union = box_w*box_h + anchor_w*anchor_h - box_intersect; - float iou = box_intersect / box_union; - if (iou > 1 || iou < 0) { // || box_w > width || box_h > height) { + float anchor_w = anchors_data.centers.vals[cluster_idx][0]; //centers->data.fl[cluster_idx * 2]; + float anchor_h = anchors_data.centers.vals[cluster_idx][1]; //centers->data.fl[cluster_idx * 2 + 1]; + if (best_iou > 1 || best_iou < 0) { // || box_w > width || box_h > height) { printf(" Wrong label: i = %d, box_w = %d, box_h = %d, anchor_w = %d, anchor_h = %d, iou = %f \n", - i, box_w, box_h, anchor_w, anchor_h, iou); + i, box_w, box_h, anchor_w, anchor_h, best_iou); } - else avg_iou += iou; + else avg_iou += best_iou; } avg_iou = 100 * avg_iou / number_of_boxes; printf("\n avg IoU = %2.2f %% \n", avg_iou); @@ -1119,7 +1104,10 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int printf("\nSaving anchors to the file: anchors.txt \n"); printf("anchors = "); for (i = 0; i < num_of_clusters; ++i) { - sprintf(buff, "%2.4f,%2.4f", centers->data.fl[i * 2], centers->data.fl[i * 2 + 1]); + float anchor_w = anchors_data.centers.vals[i][0]; //centers->data.fl[i * 2]; + float anchor_h = anchors_data.centers.vals[i][1]; //centers->data.fl[i * 2 + 1]; + if(width > 32) sprintf(buff, "%3.0f,%3.0f", anchor_w, anchor_h); + else sprintf(buff, "%2.4f,%2.4f", anchor_w, anchor_h); printf("%s", buff); fwrite(buff, sizeof(char), strlen(buff), fw); if (i + 1 < num_of_clusters) { @@ -1135,6 +1123,27 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int } if (show) { +#ifdef OPENCV + CvMat* labels = cvCreateMat(number_of_boxes, 1, CV_32SC1); + CvMat* points = cvCreateMat(number_of_boxes, 2, CV_32FC1); + CvMat* centers = cvCreateMat(num_of_clusters, 2, CV_32FC1); + + for (i = 0; i < number_of_boxes; ++i) { + points->data.fl[i * 2] = rel_width_height_array[i * 2]; + points->data.fl[i * 2 + 1] = rel_width_height_array[i * 2 + 1]; + //cvSet1D(points, i * 2, cvScalar(rel_width_height_array[i * 2], 0, 0, 0)); + //cvSet1D(points, i * 2 + 1, cvScalar(rel_width_height_array[i * 2 + 1], 0, 0, 0)); + } + + for (i = 0; i < num_of_clusters; ++i) { + centers->data.fl[i * 2] = anchors_data.centers.vals[i][0]; + centers->data.fl[i * 2 + 1] = anchors_data.centers.vals[i][1]; + } + + for (i = 0; i < number_of_boxes; ++i) { + labels->data.i[i] = anchors_data.assignments[i]; + } + size_t img_size = 700; IplImage* img = cvCreateImage(cvSize(img_size, img_size), 8, 3); cvZero(img); @@ -1161,18 +1170,20 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int cvWaitKey(0); cvReleaseImage(&img); cvDestroyAllWindows(); - } - - free(rel_width_height_array); - cvReleaseMat(&points); - cvReleaseMat(¢ers); - cvReleaseMat(&labels); -} -#else -void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) { - printf(" k-means++ can't be used without OpenCV, because there is used cvKMeans2 implementation \n"); -} + cvReleaseMat(&labels); + cvReleaseMat(&points); + cvReleaseMat(¢ers); #endif // OPENCV + } + free(rel_width_height_array); + + getchar(); +} +//#else +//void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) { +// printf(" k-means++ can't be used without OpenCV, because there is used cvKMeans2 implementation \n"); +//} +//#endif // OPENCV void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, int dont_show, int ext_output, int save_labels) diff --git a/src/matrix.c b/src/matrix.c index 08e6109b..e0eb61e6 100644 --- a/src/matrix.c +++ b/src/matrix.c @@ -179,3 +179,140 @@ void print_matrix(matrix m) for(j = 0; j < 16*m.cols-1; ++j) printf(" "); printf("__|\n"); } + + +matrix make_matrix(int rows, int cols); + +void copy(float *x, float *y, int n); +float dist(float *x, float *y, int n); +int *sample(int n); + +int closest_center(float *datum, matrix centers) +{ + int j; + int best = 0; + float best_dist = dist(datum, centers.vals[best], centers.cols); + for (j = 0; j < centers.rows; ++j) { + float new_dist = dist(datum, centers.vals[j], centers.cols); + if (new_dist < best_dist) { + best_dist = new_dist; + best = j; + } + } + return best; +} + +float dist_to_closest_center(float *datum, matrix centers) +{ + int ci = closest_center(datum, centers); + return dist(datum, centers.vals[ci], centers.cols); +} + +int kmeans_expectation(matrix data, int *assignments, matrix centers) +{ + int i; + int converged = 1; + for (i = 0; i < data.rows; ++i) { + int closest = closest_center(data.vals[i], centers); + if (closest != assignments[i]) converged = 0; + assignments[i] = closest; + } + return converged; +} + +void kmeans_maximization(matrix data, int *assignments, matrix centers) +{ + int i, j; + int *counts = calloc(centers.rows, sizeof(int)); + for (i = 0; i < centers.rows; ++i) { + for (j = 0; j < centers.cols; ++j) centers.vals[i][j] = 0; + } + for (i = 0; i < data.rows; ++i) { + ++counts[assignments[i]]; + for (j = 0; j < data.cols; ++j) { + centers.vals[assignments[i]][j] += data.vals[i][j]; + } + } + for (i = 0; i < centers.rows; ++i) { + if (counts[i]) { + for (j = 0; j < centers.cols; ++j) { + centers.vals[i][j] /= counts[i]; + } + } + } +} + + + +void random_centers(matrix data, matrix centers) { + int i, j; + int *s = sample(data.rows); + for (i = 0; i < centers.rows; ++i) { + copy(data.vals[s[i]], centers.vals[i], data.cols); + } + free(s); +} + +int *sample(int n) +{ + int i; + int *s = calloc(n, sizeof(int)); + for (i = 0; i < n; ++i) s[i] = i; + for (i = n - 1; i >= 0; --i) { + int swap = s[i]; + int index = rand() % (i + 1); + s[i] = s[index]; + s[index] = swap; + } + return s; +} + +float dist(float *x, float *y, int n) +{ + int i; + //printf(" x0 = %f, x1 = %f, y0 = %f, y1 = %f \n", x[0], x[1], y[0], y[1]); + float mw = (x[0] < y[0]) ? x[0] : y[0]; + float mh = (x[1] < y[1]) ? x[1] : y[1]; + float inter = mw*mh; + float sum = x[0] * x[1] + y[0] * y[1]; + float un = sum - inter; + float iou = inter / un; + return 1 - iou; +} + +void copy(float *x, float *y, int n) +{ + int i; + for (i = 0; i < n; ++i) y[i] = x[i]; +} + +model do_kmeans(matrix data, int k) +{ + matrix centers = make_matrix(k, data.cols); + int *assignments = calloc(data.rows, sizeof(int)); + //smart_centers(data, centers); + random_centers(data, centers); // IoU = 67.31% after kmeans + // + /* + // IoU = 63.29%, anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 + centers.vals[0][0] = 10; centers.vals[0][1] = 13; + centers.vals[1][0] = 16; centers.vals[1][1] = 30; + centers.vals[2][0] = 33; centers.vals[2][1] = 23; + centers.vals[3][0] = 30; centers.vals[3][1] = 61; + centers.vals[4][0] = 62; centers.vals[4][1] = 45; + centers.vals[5][0] = 59; centers.vals[5][1] = 119; + centers.vals[6][0] = 116; centers.vals[6][1] = 90; + centers.vals[7][0] = 156; centers.vals[7][1] = 198; + centers.vals[8][0] = 373; centers.vals[8][1] = 326; + */ + + // range centers [min - max] using exp graph or Pyth example + if (k == 1) kmeans_maximization(data, assignments, centers); + while (!kmeans_expectation(data, assignments, centers)) { + kmeans_maximization(data, assignments, centers); + } + model m; + m.assignments = assignments; + m.centers = centers; + return m; +} diff --git a/src/matrix.h b/src/matrix.h index 641b5965..1226de2b 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -5,6 +5,13 @@ typedef struct matrix{ float **vals; } matrix; +typedef struct { + int *assignments; + matrix centers; +} model; + + +model do_kmeans(matrix data, int k); matrix make_matrix(int rows, int cols); void free_matrix(matrix m); void print_matrix(matrix m);