calc_anchors can be used without OpenCV, and it uses (1-IoU) instead of Euclidean distance

2023-08-10 21:13:14 +03:00 · 2018-12-11 15:59:23 +03:00
parent fb1ee79576
commit e9226be3ed
5 changed files with 2161 additions and 70 deletions
--- a/build/darknet/x64/cfg/yolov3_5l.cfg
+++ b/build/darknet/x64/cfg/yolov3_5l.cfg
@ -0,0 +1,968 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=16
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 12,13,14
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 9,10,11
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+###############
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 11
+
+
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 4
+
+
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=64
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=64
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=64
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
--- a/cfg/yolov3_5l.cfg
+++ b/cfg/yolov3_5l.cfg
@ -0,0 +1,968 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=16
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 12,13,14
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 9,10,11
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+###############
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 11
+
+
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 4
+
+
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=64
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=64
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=64
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 4,4,  5,5,  6,6, 7,7,  8,8,  9,9, 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=15
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
--- a/src/detector.c
+++ b/src/detector.c
@ -964,7 +964,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
    return mean_average_precision;
 }

-#ifdef OPENCV
+//#ifdef OPENCV
 typedef struct {
    float w, h;
 } anchors_t;
@ -979,6 +979,17 @@ int anchors_comparator(const void *pa, const void *pb)
    return 0;
 }

+int anchors_data_comparator(const float **pa, const float **pb)
+{
+    float *a = (float *)*pa;
+    float *b = (float *)*pb;
+    float diff = b[0]*b[1] - a[0]*a[1];
+    if (diff < 0) return 1;
+    else if (diff > 0) return -1;
+    return 0;
+}
+
+
 void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show)
 {
    printf("\n num_of_clusters = %d, width = %d, height = %d \n", num_of_clusters, width, height);
@ -991,12 +1002,14 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
    //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 };
    float *rel_width_height_array = calloc(1000, sizeof(float));

+
    list *options = read_data_cfg(datacfg);
    char *train_images = option_find_str(options, "train", "data/train.list");
    list *plist = get_paths(train_images);
    int number_of_images = plist->size;
    char **paths = (char **)list_to_array(plist);

+    srand(time(0));
    int number_of_boxes = 0;
    printf(" read labels from %d images \n", number_of_images);

@ -1030,85 +1043,57 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
        }
    }
    printf("\n all loaded. \n");
+    printf("\n calculating k-means++ ...");

-    CvMat* points = cvCreateMat(number_of_boxes, 2, CV_32FC1);
-    CvMat* centers = cvCreateMat(num_of_clusters, 2, CV_32FC1);
-    CvMat* labels = cvCreateMat(number_of_boxes, 1, CV_32SC1);
+    matrix boxes_data;
+    model anchors_data;
+    boxes_data = make_matrix(number_of_boxes, 2);

+    printf("\n");
    for (i = 0; i < number_of_boxes; ++i) {
-        points->data.fl[i * 2] = rel_width_height_array[i * 2];
-        points->data.fl[i * 2 + 1] = rel_width_height_array[i * 2 + 1];
-        //cvSet1D(points, i * 2, cvScalar(rel_width_height_array[i * 2], 0, 0, 0));
-        //cvSet1D(points, i * 2 + 1, cvScalar(rel_width_height_array[i * 2 + 1], 0, 0, 0));
+        float w = boxes_data.vals[i][0] = rel_width_height_array[i * 2];
+        float h = boxes_data.vals[i][1] = rel_width_height_array[i * 2 + 1];
+        //if (w > 410 || h > 410) printf("i:%d,  w = %f, h = %f \n", i, w, h);
    }

+    // Is used: distance(box, centroid) = 1 - IoU(box, centroid)

-    const int attemps = 10;
-    double compactness;
+    // K-means
+    anchors_data = do_kmeans(boxes_data, num_of_clusters);

-    enum {
-        KMEANS_RANDOM_CENTERS = 0,
-        KMEANS_USE_INITIAL_LABELS = 1,
-        KMEANS_PP_CENTERS = 2
-    };
-
-    printf("\n calculating k-means++ ...");
-    // Should be used: distance(box, centroid) = 1 - IoU(box, centroid)
-    cvKMeans2(points, num_of_clusters, labels,
-        cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10000, 0), attemps,
-        0, KMEANS_PP_CENTERS,
-        centers, &compactness);
-
-    // sort anchors
-    qsort(centers->data.fl, num_of_clusters, 2*sizeof(float), anchors_comparator);
-
-    //orig 2.0 anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
-    //float orig_anch[] = { 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52 };
-    // worse than ours (even for 19x19 final size - for input size 608x608)
-
-    //orig anchors = 1.3221,1.73145, 3.19275,4.00944, 5.05587,8.09892, 9.47112,4.84053, 11.2364,10.0071
-    //float orig_anch[] = { 1.3221,1.73145, 3.19275,4.00944, 5.05587,8.09892, 9.47112,4.84053, 11.2364,10.0071 };
-    // orig (IoU=59.90%) better than ours (59.75%)
+    qsort(anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), anchors_data_comparator);

    //gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66
    //float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 };

-    // ours: anchors = 9.3813,6.0095, 3.3999,5.3505, 10.9476,11.1992, 5.0161,9.8314, 1.5003,2.1595
-    //float orig_anch[] = { 9.3813,6.0095, 3.3999,5.3505, 10.9476,11.1992, 5.0161,9.8314, 1.5003,2.1595 };
-    //for (i = 0; i < num_of_clusters * 2; ++i) centers->data.fl[i] = orig_anch[i];
-
-    //for (i = 0; i < number_of_boxes; ++i)
-    //    printf("%2.2f,%2.2f, ", points->data.fl[i * 2], points->data.fl[i * 2 + 1]);
-
    printf("\n");
    float avg_iou = 0;
    for (i = 0; i < number_of_boxes; ++i) {
-        float box_w = points->data.fl[i * 2];
-        float box_h = points->data.fl[i * 2 + 1];
+        float box_w = rel_width_height_array[i * 2]; //points->data.fl[i * 2];
+        float box_h = rel_width_height_array[i * 2 + 1]; //points->data.fl[i * 2 + 1];
        //int cluster_idx = labels->data.i[i];
        int cluster_idx = 0;
        float min_dist = FLT_MAX;
+        float best_iou = 0;
        for (j = 0; j < num_of_clusters; ++j) {
-            float anchor_w = centers->data.fl[j * 2];
-            float anchor_h = centers->data.fl[j * 2 + 1];
-            float w_diff = anchor_w - box_w;
-            float h_diff = anchor_h - box_h;
-            float distance = sqrt(w_diff*w_diff + h_diff*h_diff);
-            if (distance < min_dist) min_dist = distance, cluster_idx = j;
-        }
-
-        float anchor_w = centers->data.fl[cluster_idx * 2];
-        float anchor_h = centers->data.fl[cluster_idx * 2 + 1];
+            float anchor_w = anchors_data.centers.vals[j][0];   // centers->data.fl[j * 2];
+            float anchor_h = anchors_data.centers.vals[j][1];   // centers->data.fl[j * 2 + 1];
            float min_w = (box_w < anchor_w) ? box_w : anchor_w;
            float min_h = (box_h < anchor_h) ? box_h : anchor_h;
            float box_intersect = min_w*min_h;
            float box_union = box_w*box_h + anchor_w*anchor_h - box_intersect;
            float iou = box_intersect / box_union;
-        if (iou > 1 || iou < 0) { // || box_w > width || box_h > height) {
-            printf(" Wrong label: i = %d, box_w = %d, box_h = %d, anchor_w = %d, anchor_h = %d, iou = %f \n",
-                i, box_w, box_h, anchor_w, anchor_h, iou);
+            float distance = 1 - iou;
+            if (distance < min_dist) min_dist = distance, cluster_idx = j, best_iou = iou;
        }
-        else avg_iou += iou;
+
+        float anchor_w = anchors_data.centers.vals[cluster_idx][0]; //centers->data.fl[cluster_idx * 2];
+        float anchor_h = anchors_data.centers.vals[cluster_idx][1]; //centers->data.fl[cluster_idx * 2 + 1];
+        if (best_iou > 1 || best_iou < 0) { // || box_w > width || box_h > height) {
+            printf(" Wrong label: i = %d, box_w = %d, box_h = %d, anchor_w = %d, anchor_h = %d, iou = %f \n",
+                i, box_w, box_h, anchor_w, anchor_h, best_iou);
+        }
+        else avg_iou += best_iou;
    }
    avg_iou = 100 * avg_iou / number_of_boxes;
    printf("\n avg IoU = %2.2f %% \n", avg_iou);
@ -1119,7 +1104,10 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
        printf("\nSaving anchors to the file: anchors.txt \n");
        printf("anchors = ");
        for (i = 0; i < num_of_clusters; ++i) {
-            sprintf(buff, "%2.4f,%2.4f", centers->data.fl[i * 2], centers->data.fl[i * 2 + 1]);
+            float anchor_w = anchors_data.centers.vals[i][0]; //centers->data.fl[i * 2];
+            float anchor_h = anchors_data.centers.vals[i][1]; //centers->data.fl[i * 2 + 1];
+            if(width > 32) sprintf(buff, "%3.0f,%3.0f", anchor_w, anchor_h);
+            else sprintf(buff, "%2.4f,%2.4f", anchor_w, anchor_h);
            printf("%s", buff);
            fwrite(buff, sizeof(char), strlen(buff), fw);
            if (i + 1 < num_of_clusters) {
@ -1135,6 +1123,27 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
    }

    if (show) {
+#ifdef OPENCV
+        CvMat* labels = cvCreateMat(number_of_boxes, 1, CV_32SC1);
+        CvMat* points = cvCreateMat(number_of_boxes, 2, CV_32FC1);
+        CvMat* centers = cvCreateMat(num_of_clusters, 2, CV_32FC1);
+
+        for (i = 0; i < number_of_boxes; ++i) {
+            points->data.fl[i * 2] = rel_width_height_array[i * 2];
+            points->data.fl[i * 2 + 1] = rel_width_height_array[i * 2 + 1];
+            //cvSet1D(points, i * 2, cvScalar(rel_width_height_array[i * 2], 0, 0, 0));
+            //cvSet1D(points, i * 2 + 1, cvScalar(rel_width_height_array[i * 2 + 1], 0, 0, 0));
+        }
+
+        for (i = 0; i < num_of_clusters; ++i) {
+            centers->data.fl[i * 2] = anchors_data.centers.vals[i][0];
+            centers->data.fl[i * 2 + 1] = anchors_data.centers.vals[i][1];
+        }
+
+        for (i = 0; i < number_of_boxes; ++i) {
+            labels->data.i[i] = anchors_data.assignments[i];
+        }
+
        size_t img_size = 700;
        IplImage* img = cvCreateImage(cvSize(img_size, img_size), 8, 3);
        cvZero(img);
@ -1161,18 +1170,20 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
        cvWaitKey(0);
        cvReleaseImage(&img);
        cvDestroyAllWindows();
-    }
-
-    free(rel_width_height_array);
+        cvReleaseMat(&labels);
        cvReleaseMat(&points);
        cvReleaseMat(&centers);
-    cvReleaseMat(&labels);
-}
-#else
-void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) {
-    printf(" k-means++ can't be used without OpenCV, because there is used cvKMeans2 implementation \n");
-}
 #endif // OPENCV
+    }
+    free(rel_width_height_array);
+
+    getchar();
+}
+//#else
+//void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) {
+//    printf(" k-means++ can't be used without OpenCV, because there is used cvKMeans2 implementation \n");
+//}
+//#endif // OPENCV

 void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh,
                   float hier_thresh, int dont_show, int ext_output, int save_labels)
--- a/src/matrix.c
+++ b/src/matrix.c
@ -179,3 +179,140 @@ void print_matrix(matrix m)
    for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
    printf("__|\n");
 }
+
+
+matrix make_matrix(int rows, int cols);
+
+void copy(float *x, float *y, int n);
+float dist(float *x, float *y, int n);
+int *sample(int n);
+
+int closest_center(float *datum, matrix centers)
+{
+    int j;
+    int best = 0;
+    float best_dist = dist(datum, centers.vals[best], centers.cols);
+    for (j = 0; j < centers.rows; ++j) {
+        float new_dist = dist(datum, centers.vals[j], centers.cols);
+        if (new_dist < best_dist) {
+            best_dist = new_dist;
+            best = j;
+        }
+    }
+    return best;
+}
+
+float dist_to_closest_center(float *datum, matrix centers)
+{
+    int ci = closest_center(datum, centers);
+    return dist(datum, centers.vals[ci], centers.cols);
+}
+
+int kmeans_expectation(matrix data, int *assignments, matrix centers)
+{
+    int i;
+    int converged = 1;
+    for (i = 0; i < data.rows; ++i) {
+        int closest = closest_center(data.vals[i], centers);
+        if (closest != assignments[i]) converged = 0;
+        assignments[i] = closest;
+    }
+    return converged;
+}
+
+void kmeans_maximization(matrix data, int *assignments, matrix centers)
+{
+    int i, j;
+    int *counts = calloc(centers.rows, sizeof(int));
+    for (i = 0; i < centers.rows; ++i) {
+        for (j = 0; j < centers.cols; ++j) centers.vals[i][j] = 0;
+    }
+    for (i = 0; i < data.rows; ++i) {
+        ++counts[assignments[i]];
+        for (j = 0; j < data.cols; ++j) {
+            centers.vals[assignments[i]][j] += data.vals[i][j];
+        }
+    }
+    for (i = 0; i < centers.rows; ++i) {
+        if (counts[i]) {
+            for (j = 0; j < centers.cols; ++j) {
+                centers.vals[i][j] /= counts[i];
+            }
+        }
+    }
+}
+
+
+
+void random_centers(matrix data, matrix centers) {
+    int i, j;
+    int *s = sample(data.rows);
+    for (i = 0; i < centers.rows; ++i) {
+        copy(data.vals[s[i]], centers.vals[i], data.cols);
+    }
+    free(s);
+}
+
+int *sample(int n)
+{
+    int i;
+    int *s = calloc(n, sizeof(int));
+    for (i = 0; i < n; ++i) s[i] = i;
+    for (i = n - 1; i >= 0; --i) {
+        int swap = s[i];
+        int index = rand() % (i + 1);
+        s[i] = s[index];
+        s[index] = swap;
+    }
+    return s;
+}
+
+float dist(float *x, float *y, int n)
+{
+    int i;
+    //printf(" x0 = %f, x1 = %f, y0 = %f, y1 = %f \n", x[0], x[1], y[0], y[1]);
+    float mw = (x[0] < y[0]) ? x[0] : y[0];
+    float mh = (x[1] < y[1]) ? x[1] : y[1];
+    float inter = mw*mh;
+    float sum = x[0] * x[1] + y[0] * y[1];
+    float un = sum - inter;
+    float iou = inter / un;
+    return 1 - iou;
+}
+
+void copy(float *x, float *y, int n)
+{
+    int i;
+    for (i = 0; i < n; ++i) y[i] = x[i];
+}
+
+model do_kmeans(matrix data, int k)
+{
+    matrix centers = make_matrix(k, data.cols);
+    int *assignments = calloc(data.rows, sizeof(int));
+    //smart_centers(data, centers);
+    random_centers(data, centers);  // IoU = 67.31% after kmeans
+    //
+    /*
+    // IoU = 63.29%, anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+    centers.vals[0][0] = 10; centers.vals[0][1] = 13;
+    centers.vals[1][0] = 16; centers.vals[1][1] = 30;
+    centers.vals[2][0] = 33; centers.vals[2][1] = 23;
+    centers.vals[3][0] = 30; centers.vals[3][1] = 61;
+    centers.vals[4][0] = 62; centers.vals[4][1] = 45;
+    centers.vals[5][0] = 59; centers.vals[5][1] = 119;
+    centers.vals[6][0] = 116; centers.vals[6][1] = 90;
+    centers.vals[7][0] = 156; centers.vals[7][1] = 198;
+    centers.vals[8][0] = 373; centers.vals[8][1] = 326;
+    */
+
+    // range centers [min - max] using exp graph or Pyth example
+    if (k == 1) kmeans_maximization(data, assignments, centers);
+    while (!kmeans_expectation(data, assignments, centers)) {
+        kmeans_maximization(data, assignments, centers);
+    }
+    model m;
+    m.assignments = assignments;
+    m.centers = centers;
+    return m;
+}
--- a/src/matrix.h
+++ b/src/matrix.h
@ -5,6 +5,13 @@ typedef struct matrix{
    float **vals;
 } matrix;

+typedef struct {
+    int *assignments;
+    matrix centers;
+} model;
+
+
+model do_kmeans(matrix data, int k);
 matrix make_matrix(int rows, int cols);
 void free_matrix(matrix m);
 void print_matrix(matrix m);