resnet that works

2023-08-10 21:13:14 +03:00 · 2017-10-02 15:17:48 -07:00
parent 1b001a7f58
commit 62b781af4d
18 changed files with 294 additions and 1311 deletions
--- a/4
+++ b/4
@ -4,11 +4,11 @@ OPENCV=0
 OPENMP=0
 DEBUG=0
-ARCH= -gencode arch=compute_20,code=[sm_20,sm_21] \
+ARCH= -gencode arch=compute_30,code=sm_30 \
      -gencode arch=compute_30,code=sm_30 \
      -gencode arch=compute_35,code=sm_35 \
      -gencode arch=compute_50,code=[sm_50,compute_50] \
      -gencode arch=compute_52,code=[sm_52,compute_52]
 #      -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated?
 # This is what I use, uncomment if you know your arch and want to specify
 # ARCH= -gencode arch=compute_52,code=compute_52
--- a/cfg/msr_34.cfg
+++ b/cfg/msr_34.cfg
@ -1,366 +0,0 @@
 [net]
 batch=128
 subdivisions=1
 height=256
 width=256
 channels=3
 momentum=0.9
 decay=0.0005
 learning_rate=0.1
 policy=poly
 power=4
 max_batches=500000
 [crop]
 crop_height=224
 crop_width=224
 flip=1
 saturation=1
 exposure=1
 angle=0
 [convolutional]
 batch_normalize=1
 filters=64
 size=7
 stride=2
 pad=1
 activation=leaky
 [maxpool]
 size=3
 stride=2
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from = -3
 [avgpool]
 [connected]
 output=1000
 activation=leaky
 [softmax]
 groups=1
 [cost]
 type=sse
--- a/cfg/msr_50.cfg
+++ b/cfg/msr_50.cfg
@ -1,558 +0,0 @@
 [net]
 batch=128
 subdivisions=8
 height=256
 width=256
 channels=3
 momentum=0.9
 decay=0.0001
 learning_rate=0.05
 policy=poly
 power=4
 max_batches=500000
 [crop]
 crop_height=224
 crop_width=224
 flip=1
 saturation=1
 exposure=1
 angle=0
 ##### Conv 1 #####
 [convolutional]
 batch_normalize=1
 filters=64
 size=7
 stride=2
 pad=1
 activation=leaky
 [maxpool]
 size=3
 stride=2
 ##### Conv 2_x #####
 [convolutional]
 batch_normalize=1
 filters=64
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=linear
 [route]
 layers=-4
 [convolutional]
 batch_normalize=1
 size=1
 stride=1
 pad=1
 activation=linear
 filters=256
 [shortcut]
 from = -3
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 ##### Conv 3_x #####
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=linear
 [route]
 layers=-4
 [convolutional]
 batch_normalize=1
 size=1
 stride=2
 pad=1
 activation=linear
 filters=512
 [shortcut]
 from = -3
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 ##### Conv 4_x #####
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=1
 stride=1
 pad=1
 activation=linear
 [route]
 layers=-4
 [convolutional]
 batch_normalize=1
 size=1
 stride=2
 pad=1
 activation=linear
 filters=1024
 [shortcut]
 from = -3
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 ##### Conv 5_x #####
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=2048
 size=1
 stride=1
 pad=1
 activation=linear
 [route]
 layers=-4
 [convolutional]
 batch_normalize=1
 size=1
 stride=2
 pad=1
 activation=linear
 filters=2048
 [shortcut]
 from = -3
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=2048
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=2048
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [avgpool]
 [connected]
 output=1000
 activation=leaky
 [softmax]
 groups=1
 [cost]
 type=sse
--- a/cfg/resnet152.cfg
+++ b/cfg/resnet152.cfg
@ -1,26 +1,30 @@
 [net]
-batch=128
+# Training
-subdivisions=8
+# batch=128
 # subdivisions=8
 # Testing
 batch=1
 subdivisions=1
 height=256
 width=256
 max_crop=448
 channels=3
 momentum=0.9
-decay=0.0001
+decay=0.0005
 burn_in=1000
 learning_rate=0.1
 policy=poly
 power=4
-max_batches=500000
+max_batches=1600000
-[crop]
+angle=7
-crop_height=224
+hue=.1
-crop_width=224
+saturation=.75
-flip=1
+exposure=.75
-saturation=1
+aspect=.75
 exposure=1
 angle=0
 ##### Conv 1 #####
 [convolutional]
 batch_normalize=1
@ -31,13 +35,9 @@ pad=1
 activation=leaky
 [maxpool]
-size=3
+size=2
 stride=2
 ##### Conv 2_x #####
 [convolutional]
 batch_normalize=1
 filters=64
@ -62,19 +62,8 @@ stride=1
 pad=1
 activation=linear
 [route]
 layers=-4
 [convolutional]
 batch_normalize=1
 size=1
 stride=1
 pad=1
 activation=linear
 filters=256
 [shortcut]
-from = -3
+from=-4
 activation=leaky
 [convolutional]
@ -102,8 +91,7 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
@ -131,13 +119,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 ##### Conv 3_x #####
 [convolutional]
 batch_normalize=1
 filters=128
@ -162,23 +146,10 @@ stride=1
 pad=1
 activation=linear
 [route]
 layers=-4
 [convolutional]
 batch_normalize=1
 size=1
 stride=2
 pad=1
 activation=linear
 filters=512
 [shortcut]
-from = -3
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
@ -204,11 +175,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
@ -234,11 +203,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
@ -264,11 +231,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
@ -294,11 +259,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
@ -324,11 +287,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
@ -354,11 +315,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
@ -384,14 +343,11 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
-
+# Conv 4
 ##### Conv 4_x #####
 [convolutional]
 batch_normalize=1
 filters=256
@ -416,23 +372,10 @@ stride=1
 pad=1
 activation=linear
 [route]
 layers=-4
 [convolutional]
 batch_normalize=1
 size=1
 stride=2
 pad=1
 activation=linear
 filters=1024
 [shortcut]
-from = -3
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -458,11 +401,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -488,11 +429,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -518,11 +457,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -548,11 +485,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -578,11 +513,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -608,11 +541,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -638,11 +569,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -668,11 +597,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -698,11 +625,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -728,11 +653,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -758,11 +681,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -788,11 +709,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -818,11 +737,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -848,11 +765,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -878,11 +793,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -908,11 +821,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -938,11 +849,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -968,11 +877,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -998,11 +905,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1028,11 +933,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1058,11 +961,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1088,11 +989,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1118,11 +1017,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1148,11 +1045,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1178,11 +1073,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1208,11 +1101,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1238,11 +1129,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1268,11 +1157,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1298,11 +1185,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1328,11 +1213,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1358,11 +1241,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1388,11 +1269,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1418,11 +1297,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1448,11 +1325,9 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
@ -1478,13 +1353,10 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
-
+#Conv 5
 ##### Conv 5_x #####
 [convolutional]
 batch_normalize=1
 filters=512
@ -1509,50 +1381,8 @@ stride=1
 pad=1
 activation=linear
 [route]
 layers=-4
 [convolutional]
 batch_normalize=1
 size=1
 stride=2
 pad=1
 activation=linear
 filters=2048
 [shortcut]
-from = -3
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=2048
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from = -4
 activation=leaky
 [convolutional]
@ -1580,16 +1410,51 @@ pad=1
 activation=linear
 [shortcut]
-from = -4
+from=-4
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=2048
 size=1
 stride=1
 pad=1
 activation=linear
 [shortcut]
 from=-4
 activation=leaky
 [convolutional]
 filters=1000
 size=1
 stride=1
 pad=1
 activation=linear
 [avgpool]
 [connected]
 output=1000
 activation=leaky
 [softmax]
 groups=1
--- a/examples/classifier.c
+++ b/examples/classifier.c
@ -58,7 +58,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    load_args args = {0};
    args.w = net.w;
    args.h = net.h;
-    args.threads = 64;
+    args.threads = 32;
    args.hierarchy = net.hierarchy;
    args.min = net.min_crop;
@ -123,6 +123,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    char buff[256];
    sprintf(buff, "%s/%s.weights", backup_directory, base);
    save_weights(net, buff);
    pthread_join(load_thread, 0);
    free_network(net);
    free_ptrs((void**)labels, classes);
--- a/examples/darknet.c
+++ b/examples/darknet.c
@ -83,27 +83,8 @@ void average(int argc, char *argv[])
    save_weights(sum, outfile);
 }
-void speed(char *cfgfile, int tics)
+long numops(network net)
 {
    if (tics == 0) tics = 1000;
    network net = parse_network_cfg(cfgfile);
    set_batch_network(&net, 1);
    int i;
    double time=what_time_is_it_now();
    image im = make_image(net.w, net.h, net.c*net.batch);
    for(i = 0; i < tics; ++i){
        network_predict(net, im.data);
    }
    double t = what_time_is_it_now() - time;
    printf("\n%d evals, %f Seconds\n", tics, t);
    printf("Speed: %f sec/eval\n", t/tics);
    printf("Speed: %f Hz\n", tics/t);
 }
 void operations(char *cfgfile)
 {
    gpu_index = -1;
    network net = parse_network_cfg(cfgfile);
    int i;
    long ops = 0;
    for(i = 0; i < net.n; ++i){
@ -134,6 +115,34 @@ void operations(char *cfgfile)
            ops += 2l * l.wo->inputs * l.wo->outputs;
        }
    }
    return ops;
 }
 void speed(char *cfgfile, int tics)
 {
    if (tics == 0) tics = 1000;
    network net = parse_network_cfg(cfgfile);
    set_batch_network(&net, 1);
    int i;
    double time=what_time_is_it_now();
    image im = make_image(net.w, net.h, net.c*net.batch);
    for(i = 0; i < tics; ++i){
        network_predict(net, im.data);
    }
    double t = what_time_is_it_now() - time;
    long ops = numops(net);
    printf("\n%d evals, %f Seconds\n", tics, t);
    printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
    printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t);
    printf("Speed: %f sec/eval\n", t/tics);
    printf("Speed: %f Hz\n", tics/t);
 }
 void operations(char *cfgfile)
 {
    gpu_index = -1;
    network net = parse_network_cfg(cfgfile);
    long ops = numops(net);
    printf("Floating Point Operations: %ld\n", ops);
    printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
 }
--- a/examples/detector.c
+++ b/examples/detector.c
@ -52,10 +52,10 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
    args.d = &buffer;
    args.type = DETECTION_DATA;
    //args.type = INSTANCE_DATA;
-    args.threads = 8;
+    args.threads = 64;
    pthread_t load_thread = load_data(args);
-    clock_t time;
+    double time;
    int count = 0;
    //while(i*imgs < N*120){
    while(get_current_batch(net) < net.max_batches){
@ -78,7 +78,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
            }
            net = nets[0];
        }
-        time=clock();
+        time=what_time_is_it_now();
        pthread_join(load_thread, 0);
        train = buffer;
        load_thread = load_data(args);
@ -107,9 +107,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
        }
        */
-        printf("Loaded: %lf seconds\n", sec(clock()-time));
+        printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
-        time=clock();
+        time=what_time_is_it_now();
        float loss = 0;
 #ifdef GPU
        if(ngpus == 1){
@ -124,7 +124,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
        avg_loss = avg_loss*.9 + loss*.1;
        i = get_current_batch(net);
-        printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
+        printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs);
        if(i%100==0){
 #ifdef GPU
            if(ngpus != 1) sync_nets(nets, ngpus, 0);
@ -313,7 +313,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
        args.resized = &buf_resized[t];
        thr[t] = load_data_in_thread(args);
    }
-    time_t start = time(0);
+    double start = what_time_is_it_now();
    for(i = nthreads; i < m+nthreads; i += nthreads){
        fprintf(stderr, "%d\n", i);
        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
@ -359,7 +359,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
        fprintf(fp, "\n]\n");
        fclose(fp);
    }
-    fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
+    fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start);
 }
@ -447,7 +447,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
        args.resized = &buf_resized[t];
        thr[t] = load_data_in_thread(args);
    }
-    time_t start = time(0);
+    double start = what_time_is_it_now();
    for(i = nthreads; i < m+nthreads; i += nthreads){
        fprintf(stderr, "%d\n", i);
        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
@ -490,7 +490,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
        fprintf(fp, "\n]\n");
        fclose(fp);
    }
-    fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
+    fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start);
 }
 void validate_detector_recall(char *cfgfile, char *weightfile)
--- a/examples/rnn.c
+++ b/examples/rnn.c
@ -52,6 +52,7 @@ char **read_tokens(char *filename, size_t *read)
    return d;
 }
 float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps)
 {
    float *x = calloc(batch * steps * characters, sizeof(float));
@ -78,6 +79,35 @@ float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size
    return p;
 }
 float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps)
 {
    int i,j;
    float *x = calloc(batch * steps * characters, sizeof(float));
    float *y = calloc(batch * steps * characters, sizeof(float));
    for(i = 0; i < batch; ++i){
        int index = rand()%n;
        for(j = 0; j < steps; ++j){
            unsigned char curr = source[index][j];
            unsigned char next = dest[index][j];
            x[(j*batch + i)*characters + curr] = 1;
            y[(j*batch + i)*characters + next] = 1;
            if(curr > 255 || curr <= 0 || next > 255 || next <= 0){
                /*text[(index+j+2)%len] = 0;
                printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]);
                printf("%s", text+index);
                */
                error("Bad char");
            }
        }
    }
    float_pair p;
    p.x = x;
    p.y = y;
    return p;
 }
 float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps)
 {
    float *x = calloc(batch * steps * characters, sizeof(float));
--- a/examples/segmenter.c
+++ b/examples/segmenter.c
@ -211,7 +211,7 @@ void demo_segmenter(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
        image in = get_image_from_stream(cap);
        image in_s = letterbox_image(in, net.w, net.h);
-        float *predictions = network_predict(net, in_s.data);
+        network_predict(net, in_s.data);
        printf("\033[2J");
        printf("\033[1;1H");
--- a/src/activation_kernels.cu
+++ b/src/activation_kernels.cu
@ -10,8 +10,8 @@ extern "C" {
 __device__ float lhtan_activate_kernel(float x)
 {
-    if(x < 0) return .001*x;
+    if(x < 0) return .001f*x;
-    if(x > 1) return .001*(x-1) + 1;
+    if(x > 1) return .001f*(x-1.f) + 1.f;
    return x;
 }
 __device__ float lhtan_gradient_kernel(float x)
@ -27,25 +27,25 @@ __device__ float hardtan_activate_kernel(float x)
    return x;
 }
 __device__ float linear_activate_kernel(float x){return x;}
-__device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
+__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
-__device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;}
+__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
 __device__ float relu_activate_kernel(float x){return x*(x>0);}
-__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
+__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
-__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;}
+__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
-__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
+__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
-__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
+__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
-__device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);}
+__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
 __device__ float plse_activate_kernel(float x)
 {
-    if(x < -4) return .01 * (x + 4);
+    if(x < -4) return .01f * (x + 4);
-    if(x > 4)  return .01 * (x - 4) + 1;
+    if(x > 4)  return .01f * (x - 4) + 1;
-    return .125*x + .5;
+    return .125f*x + .5f;
 }
 __device__ float stair_activate_kernel(float x)
 {
-    int n = floor(x);
+    int n = floorf(x);
-    if (n%2 == 0) return floor(x/2.);
+    if (n%2 == 0) return floorf(x/2);
-    else return (x - n) + floor(x/2.);
+    else return (x - n) + floorf(x/2);
 }
@ -58,19 +58,19 @@ __device__ float linear_gradient_kernel(float x){return 1;}
 __device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
 __device__ float loggy_gradient_kernel(float x)
 {
-    float y = (x+1.)/2.;
+    float y = (x+1)/2;
    return 2*(1-y)*y;
 }
 __device__ float relu_gradient_kernel(float x){return (x>0);}
 __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
-__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;}
+__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;}
-__device__ float ramp_gradient_kernel(float x){return (x>0)+.1;}
+__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;}
-__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;}
+__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;}
 __device__ float tanh_gradient_kernel(float x){return 1-x*x;}
-__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;}
+__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;}
 __device__ float stair_gradient_kernel(float x)
 {
-    if (floor(x) == x) return 0;
+    if (floorf(x) == x) return 0;
    return 1;
 }
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@ -165,7 +165,7 @@ __global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float
    int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (index >= N) return;
-    x[index] = x[index] + (rate * sqrt(1.-pow(B2, t)) / (1.-pow(B1, t)) * m[index] / (sqrt(v[index]) + eps));
+    x[index] = x[index] + (rate * sqrtf(1.f-powf(B2, t)) / (1.f-powf(B1, t)) * m[index] / (sqrtf(v[index]) + eps));
 }
 extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
@ -194,7 +194,7 @@ __global__ void normalize_kernel(int N, float *x, float *mean, float *variance,
    if (index >= N) return;
    int f = (index/spatial)%filters;
-    x[index] = (x[index] - mean[f])/(sqrt(variance[f] + .00001f));
+    x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f));
 }
 __global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
@ -203,7 +203,7 @@ __global__ void normalize_delta_kernel(int N, float *x, float *mean, float *vari
    if (index >= N) return;
    int f = (index/spatial)%filters;
-    delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
+    delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
 }
 extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
@ -225,7 +225,7 @@ __global__ void  variance_delta_kernel(float *x, float *delta, float *mean, floa
            variance_delta[i] += delta[index]*(x[index] - mean[i]);
        }
    }
-    variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.));
+    variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f));
 }
 __global__ void accumulate_kernel(float *x, int n, int groups, float *sum)
@ -264,7 +264,7 @@ __global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch,
        for(i = 0; i < threads; ++i){
            mean_delta[filter] += local[i];
        }
-        mean_delta[filter] *= (-1./sqrt(variance[filter] + .00001f));
+        mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f));
    }
 }
@ -294,7 +294,7 @@ __global__ void  fast_variance_delta_kernel(float *x, float *delta, float *mean,
        for(i = 0; i < threads; ++i){
            variance_delta[filter] += local[i];
        }
-        variance_delta[filter] *= -.5 * pow(variance[filter] + .00001f, (float)(-3./2.));
+        variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f));
    }
 }
@ -311,7 +311,7 @@ __global__ void mean_delta_kernel(float *delta, float *variance, int batch, int
            mean_delta[i] += delta[index];
        }
    }
-    mean_delta[i] *= (-1./sqrt(variance[i] + .00001f));
+    mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f));
 }
 extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
@ -334,7 +334,7 @@ extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, flo
 __global__ void  mean_kernel(float *x, int batch, int filters, int spatial, float *mean)
 {
-    float scale = 1./(batch * spatial);
+    float scale = 1.f/(batch * spatial);
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= filters) return;
    int j,k;
@ -350,7 +350,7 @@ __global__ void  mean_kernel(float *x, int batch, int filters, int spatial, floa
 __global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance)
 {
-    float scale = 1./(batch * spatial - 1);
+    float scale = 1.f/(batch * spatial - 1);
    int j,k;
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= filters) return;
@ -358,7 +358,7 @@ __global__ void variance_kernel(float *x, float *mean, int batch, int filters, i
    for(j = 0; j < batch; ++j){
        for(k = 0; k < spatial; ++k){
            int index = j*filters*spatial + i*spatial + k;
-            variance[i] += pow((x[index] - mean[i]), 2);
+            variance[i] += powf((x[index] - mean[i]), 2);
        }
    }
    variance[i] *= scale;
@ -516,7 +516,7 @@ __global__ void  fast_variance_kernel(float *x, float *mean, int batch, int filt
        for(i = 0; i < spatial; i += threads){
            int index = j*spatial*filters + filter*spatial + i + id;
-            local[id] += (i+id < spatial) ? pow((x[index] - mean[filter]), 2) : 0;
+            local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0;
        }
    }
@ -716,7 +716,7 @@ __global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta,
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n){
        float diff = truth[i] - pred[i];
-        float abs_val = abs(diff);
+        float abs_val = fabsf(diff);
        if(abs_val < 1) {
            error[i] = diff * diff;
            delta[i] = diff;
@ -864,7 +864,7 @@ __device__ void softmax_device(float *input, int n, float temp, int stride, floa
        largest = (val>largest) ? val : largest;
    }
    for(i = 0; i < n; ++i){
-        float e = exp(input[i*stride]/temp - largest/temp);
+        float e = expf(input[i*stride]/temp - largest/temp);
        sum += e;
        output[i*stride] = e;
    }
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@ -33,7 +33,7 @@ __global__ void binarize_input_kernel(float *input, int n, int size, float *bina
    int i = 0;
    float mean = 0;
    for(i = 0; i < n; ++i){
-        mean += abs(input[i*size + s]);
+        mean += fabsf(input[i*size + s]);
    }
    mean = mean / n;
    for(i = 0; i < n; ++i){
@ -55,7 +55,7 @@ __global__ void binarize_weights_kernel(float *weights, int n, int size, float *
    int i = 0;
    float mean = 0;
    for(i = 0; i < size; ++i){
-        mean += abs(weights[f*size + i]);
+        mean += fabsf(weights[f*size + i]);
    }
    mean = mean / size;
    for(i = 0; i < size; ++i){
@ -139,8 +139,8 @@ __global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, fl
    id /= c;
    int b = id;
-    int w_offset = -(size/2.);
+    int w_offset = -(size/2.f);
-    int h_offset = -(size/2.);
+    int h_offset = -(size/2.f);
    int out_index = j + w*(i + h*(k + c*b));
    int l, m;
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@ -113,9 +113,9 @@ __global__ void levels_image_kernel(float *image, float *rand, int batch, int w,
    float r3 = rand[8*id + 3];
    saturation = r0*(saturation - 1) + 1;
-    saturation = (r1 > .5) ? 1./saturation : saturation;
+    saturation = (r1 > .5f) ? 1.f/saturation : saturation;
    exposure = r2*(exposure - 1) + 1;
-    exposure = (r3 > .5) ? 1./exposure : exposure;
+    exposure = (r3 > .5f) ? 1.f/exposure : exposure;
    size_t offset = id * h * w * 3;
    image += offset;
@ -131,9 +131,9 @@ __global__ void levels_image_kernel(float *image, float *rand, int batch, int w,
    } else {
        shift = 0;
    }
-    image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5)*shift;
+    image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift;
-    image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5)*shift;
+    image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift;
-    image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5)*shift;
+    image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift;
 }
 __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output)
@ -141,8 +141,8 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(id >= size) return;
-    float cx = w/2.;
+    float cx = w/2.f;
-    float cy = h/2.;
+    float cy = h/2.f;
    int count = id;
    int j = id % crop_width;
@ -160,11 +160,11 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i
    float dw = (w - crop_width)*r4;
    float dh = (h - crop_height)*r5;
-    flip = (flip && (r6 > .5));
+    flip = (flip && (r6 > .5f));
    angle = 2*angle*r7 - angle;
    if(!train){
-        dw = (w - crop_width)/2.;
+        dw = (w - crop_width)/2.f;
-        dh = (h - crop_height)/2.;
+        dh = (h - crop_height)/2.f;
        flip = 0;
        angle = 0;
    }
@ -174,8 +174,8 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i
    float x = (flip) ? w - dw - j - 1 : j + dw;    
    float y = i + dh;
-    float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx;
+    float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx;
-    float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy;
+    float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy;
    output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
 }
@ -184,7 +184,7 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network net)
 {
    cuda_random(layer.rand_gpu, layer.batch*8);
-    float radians = layer.angle*3.14159265/180.;
+    float radians = layer.angle*3.14159265f/180.f;
    float scale = 2;
    float translate = -1;
--- a/src/data.c
+++ b/src/data.c
@ -137,14 +137,18 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,
 box_label *read_boxes(char *filename, int *n)
 {
    box_label *boxes = calloc(1, sizeof(box_label));
    FILE *file = fopen(filename, "r");
    if(!file) file_error(filename);
    float x, y, h, w;
    int id;
    int count = 0;
    int size = 64;
    box_label *boxes = calloc(size, sizeof(box_label));
    while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
-        boxes = realloc(boxes, (count+1)*sizeof(box_label));
+        if(count == size) {
            size = size * 2;
            boxes = realloc(boxes, size*sizeof(box_label));
        }
        boxes[count].id = id;
        boxes[count].x = x;
        boxes[count].y = y;
@ -976,6 +980,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
        place_image(orig, nw, nh, dx, dy, sized);
        random_distort_image(sized, hue, saturation, exposure);
        int flip = rand()%2;
        if(flip) flip_image(sized);
        d.X.vals[i] = sized.data;
--- a/src/demo.c
+++ b/src/demo.c
@ -20,10 +20,6 @@ static int demo_classes;
 static float **probs;
 static box *boxes;
 static network net;
 static network net2;
 static float **probs2;
 static box *boxes2;
 static float **predictions2;
 static image buff [3];
 static image buff_letter[3];
 static int buff_index = 0;
--- a/src/image.c
+++ b/src/image.c
@ -44,6 +44,51 @@ image mask_to_rgb(image mask)
    return im;
 }
 static float get_pixel(image m, int x, int y, int c)
 {
    assert(x < m.w && y < m.h && c < m.c);
    return m.data[c*m.h*m.w + y*m.w + x];
 }
 static float get_pixel_extend(image m, int x, int y, int c)
 {
    if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0;
    /*
    if(x < 0) x = 0;
    if(x >= m.w) x = m.w-1;
    if(y < 0) y = 0;
    if(y >= m.h) y = m.h-1;
    */
    if(c < 0 || c >= m.c) return 0;
    return get_pixel(m, x, y, c);
 }
 static void set_pixel(image m, int x, int y, int c, float val)
 {
    if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] = val;
 }
 static void add_pixel(image m, int x, int y, int c, float val)
 {
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] += val;
 }
 static float bilinear_interpolate(image im, float x, float y, int c)
 {
    int ix = (int) floorf(x);
    int iy = (int) floorf(y);
    float dx = x - ix;
    float dy = y - iy;
    float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + 
        dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + 
        (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
        dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
    return val;
 }
 void composite_image(image source, image dest, int dx, int dy)
 {
    int x,y,k;
@ -1255,21 +1300,6 @@ void saturate_exposure_image(image im, float sat, float exposure)
    constrain_image(im);
 }
 float bilinear_interpolate(image im, float x, float y, int c)
 {
    int ix = (int) floorf(x);
    int iy = (int) floorf(y);
    float dx = x - ix;
    float dy = y - iy;
    float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + 
        dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + 
        (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
        dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
    return val;
 }
 image resize_image(image im, int w, int h)
 {
    image resized = make_image(w, h, im.c);   
@ -1419,36 +1449,6 @@ image get_image_layer(image m, int l)
    }
    return out;
 }
 float get_pixel(image m, int x, int y, int c)
 {
    assert(x < m.w && y < m.h && c < m.c);
    return m.data[c*m.h*m.w + y*m.w + x];
 }
 float get_pixel_extend(image m, int x, int y, int c)
 {
    if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0;
    /*
    if(x < 0) x = 0;
    if(x >= m.w) x = m.w-1;
    if(y < 0) y = 0;
    if(y >= m.h) y = m.h-1;
    */
    if(c < 0 || c >= m.c) return 0;
    return get_pixel(m, x, y, c);
 }
 void set_pixel(image m, int x, int y, int c, float val)
 {
    if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] = val;
 }
 void add_pixel(image m, int x, int y, int c, float val)
 {
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] += val;
 }
 void print_image(image m)
 {
    int i, j, k;
--- a/src/image.h
+++ b/src/image.h
@ -60,12 +60,6 @@ void print_image(image m);
 image make_empty_image(int w, int h, int c);
 void copy_image_into(image src, image dest);
 float get_pixel(image m, int x, int y, int c);
 float get_pixel_extend(image m, int x, int y, int c);
 void set_pixel(image m, int x, int y, int c, float val);
 void add_pixel(image m, int x, int y, int c, float val);
 float bilinear_interpolate(image im, float x, float y, int c);
 image get_image_layer(image m, int l);
 #endif
--- a/src/utils.h
+++ b/src/utils.h
@ -5,7 +5,14 @@
 #include "darknet.h"
 #include "list.h"
-#define TWO_PI 6.2831853071795864769252866
+#define TIME(a) \
    do { \
    double start = what_time_is_it_now(); \
    a; \
    printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \
    } while (0)
 #define TWO_PI 6.2831853071795864769252866f
 double what_time_is_it_now();
 void shuffle(void *arr, size_t n, size_t size);