From 62b781af4d01fc5f074407590cf556b36c70e837 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Mon, 2 Oct 2017 15:17:48 -0700 Subject: [PATCH] resnet that works --- Makefile | 4 +- cfg/msr_34.cfg | 366 ------------------- cfg/msr_50.cfg | 558 ----------------------------- cfg/{msr_152.cfg => resnet152.cfg} | 353 ++++++------------ examples/classifier.c | 3 +- examples/darknet.c | 49 +-- examples/detector.c | 20 +- examples/rnn.c | 30 ++ examples/segmenter.c | 2 +- src/activation_kernels.cu | 42 +-- src/blas_kernels.cu | 26 +- src/convolutional_kernels.cu | 8 +- src/crop_layer_kernels.cu | 26 +- src/data.c | 9 +- src/demo.c | 4 - src/image.c | 90 ++--- src/image.h | 6 - src/utils.h | 9 +- 18 files changed, 294 insertions(+), 1311 deletions(-) delete mode 100644 cfg/msr_34.cfg delete mode 100644 cfg/msr_50.cfg rename cfg/{msr_152.cfg => resnet152.cfg} (92%) diff --git a/Makefile b/Makefile index b0a9f1ce..b61f1c60 100644 --- a/Makefile +++ b/Makefile @@ -4,11 +4,11 @@ OPENCV=0 OPENMP=0 DEBUG=0 -ARCH= -gencode arch=compute_20,code=[sm_20,sm_21] \ - -gencode arch=compute_30,code=sm_30 \ +ARCH= -gencode arch=compute_30,code=sm_30 \ -gencode arch=compute_35,code=sm_35 \ -gencode arch=compute_50,code=[sm_50,compute_50] \ -gencode arch=compute_52,code=[sm_52,compute_52] +# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? # This is what I use, uncomment if you know your arch and want to specify # ARCH= -gencode arch=compute_52,code=compute_52 diff --git a/cfg/msr_34.cfg b/cfg/msr_34.cfg deleted file mode 100644 index 5ae23cf5..00000000 --- a/cfg/msr_34.cfg +++ /dev/null @@ -1,366 +0,0 @@ -[net] -batch=128 -subdivisions=1 -height=256 -width=256 -channels=3 -momentum=0.9 -decay=0.0005 - -learning_rate=0.1 -policy=poly -power=4 -max_batches=500000 - -[crop] -crop_height=224 -crop_width=224 -flip=1 -saturation=1 -exposure=1 -angle=0 - -[convolutional] -batch_normalize=1 -filters=64 -size=7 -stride=2 -pad=1 -activation=leaky - -[maxpool] -size=3 -stride=2 - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - - - - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - - - - - - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - - - - - - - - - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from = -3 - -[avgpool] - -[connected] -output=1000 -activation=leaky - -[softmax] -groups=1 - -[cost] -type=sse - diff --git a/cfg/msr_50.cfg b/cfg/msr_50.cfg deleted file mode 100644 index 2edd21c1..00000000 --- a/cfg/msr_50.cfg +++ /dev/null @@ -1,558 +0,0 @@ -[net] -batch=128 -subdivisions=8 -height=256 -width=256 -channels=3 -momentum=0.9 -decay=0.0001 - -learning_rate=0.05 -policy=poly -power=4 -max_batches=500000 - - - -[crop] -crop_height=224 -crop_width=224 -flip=1 -saturation=1 -exposure=1 -angle=0 - -##### Conv 1 ##### - -[convolutional] -batch_normalize=1 -filters=64 -size=7 -stride=2 -pad=1 -activation=leaky - -[maxpool] -size=3 -stride=2 - - -##### Conv 2_x ##### - - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=linear - -[route] -layers=-4 - -[convolutional] -batch_normalize=1 -size=1 -stride=1 -pad=1 -activation=linear -filters=256 - -[shortcut] -from = -3 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - - -##### Conv 3_x ##### - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=linear - -[route] -layers=-4 - -[convolutional] -batch_normalize=1 -size=1 -stride=2 -pad=1 -activation=linear -filters=512 - -[shortcut] -from = -3 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - - -##### Conv 4_x ##### - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=linear - -[route] -layers=-4 - -[convolutional] -batch_normalize=1 -size=1 -stride=2 -pad=1 -activation=linear -filters=1024 - -[shortcut] -from = -3 -activation=leaky - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - - -##### Conv 5_x ##### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=2048 -size=1 -stride=1 -pad=1 -activation=linear - - -[route] -layers=-4 - -[convolutional] -batch_normalize=1 -size=1 -stride=2 -pad=1 -activation=linear -filters=2048 - -[shortcut] -from = -3 -activation=leaky - - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=2048 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=2048 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 -activation=leaky - -[avgpool] - -[connected] -output=1000 -activation=leaky - -[softmax] -groups=1 - -[cost] -type=sse - diff --git a/cfg/msr_152.cfg b/cfg/resnet152.cfg similarity index 92% rename from cfg/msr_152.cfg rename to cfg/resnet152.cfg index b19c999d..d5fe9094 100644 --- a/cfg/msr_152.cfg +++ b/cfg/resnet152.cfg @@ -1,26 +1,30 @@ [net] -batch=128 -subdivisions=8 +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + height=256 width=256 +max_crop=448 channels=3 momentum=0.9 -decay=0.0001 +decay=0.0005 +burn_in=1000 learning_rate=0.1 policy=poly power=4 -max_batches=500000 +max_batches=1600000 -[crop] -crop_height=224 -crop_width=224 -flip=1 -saturation=1 -exposure=1 -angle=0 - -##### Conv 1 ##### +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 [convolutional] batch_normalize=1 @@ -31,13 +35,9 @@ pad=1 activation=leaky [maxpool] -size=3 +size=2 stride=2 - -##### Conv 2_x ##### - - [convolutional] batch_normalize=1 filters=64 @@ -62,19 +62,8 @@ stride=1 pad=1 activation=linear -[route] -layers=-4 - -[convolutional] -batch_normalize=1 -size=1 -stride=1 -pad=1 -activation=linear -filters=256 - [shortcut] -from = -3 +from=-4 activation=leaky [convolutional] @@ -102,8 +91,7 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky [convolutional] @@ -131,13 +119,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - -##### Conv 3_x ##### - [convolutional] batch_normalize=1 filters=128 @@ -162,23 +146,10 @@ stride=1 pad=1 activation=linear - -[route] -layers=-4 - -[convolutional] -batch_normalize=1 -size=1 -stride=2 -pad=1 -activation=linear -filters=512 - [shortcut] -from = -3 +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=128 @@ -204,11 +175,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=128 @@ -234,11 +203,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=128 @@ -264,11 +231,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=128 @@ -294,11 +259,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=128 @@ -324,11 +287,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=128 @@ -354,11 +315,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=128 @@ -384,14 +343,11 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - -##### Conv 4_x ##### - +# Conv 4 [convolutional] batch_normalize=1 filters=256 @@ -416,23 +372,10 @@ stride=1 pad=1 activation=linear - -[route] -layers=-4 - -[convolutional] -batch_normalize=1 -size=1 -stride=2 -pad=1 -activation=linear -filters=1024 - [shortcut] -from = -3 +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -458,11 +401,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -488,11 +429,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -518,11 +457,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -548,11 +485,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -578,11 +513,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -608,11 +541,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -638,11 +569,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -668,11 +597,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -698,11 +625,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -728,11 +653,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -758,11 +681,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -788,11 +709,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -818,11 +737,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -848,11 +765,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -878,11 +793,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -908,11 +821,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -938,11 +849,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -968,11 +877,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -998,11 +905,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1028,11 +933,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1058,11 +961,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1088,11 +989,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1118,11 +1017,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1148,11 +1045,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1178,11 +1073,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1208,11 +1101,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1238,11 +1129,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1268,11 +1157,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1298,11 +1185,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1328,11 +1213,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1358,11 +1241,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1388,11 +1269,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1418,11 +1297,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1448,11 +1325,9 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - [convolutional] batch_normalize=1 filters=256 @@ -1478,13 +1353,10 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky - -##### Conv 5_x ##### - +#Conv 5 [convolutional] batch_normalize=1 filters=512 @@ -1509,50 +1381,8 @@ stride=1 pad=1 activation=linear - -[route] -layers=-4 - -[convolutional] -batch_normalize=1 -size=1 -stride=2 -pad=1 -activation=linear -filters=2048 - [shortcut] -from = -3 -activation=leaky - - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=2048 -size=1 -stride=1 -pad=1 -activation=linear - -[shortcut] -from = -4 - +from=-4 activation=leaky [convolutional] @@ -1580,16 +1410,51 @@ pad=1 activation=linear [shortcut] -from = -4 - +from=-4 activation=leaky +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + [avgpool] -[connected] -output=1000 -activation=leaky - [softmax] groups=1 diff --git a/examples/classifier.c b/examples/classifier.c index 593b34e5..20202c8c 100644 --- a/examples/classifier.c +++ b/examples/classifier.c @@ -58,7 +58,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, load_args args = {0}; args.w = net.w; args.h = net.h; - args.threads = 64; + args.threads = 32; args.hierarchy = net.hierarchy; args.min = net.min_crop; @@ -123,6 +123,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, char buff[256]; sprintf(buff, "%s/%s.weights", backup_directory, base); save_weights(net, buff); + pthread_join(load_thread, 0); free_network(net); free_ptrs((void**)labels, classes); diff --git a/examples/darknet.c b/examples/darknet.c index e63afef8..9ead608c 100644 --- a/examples/darknet.c +++ b/examples/darknet.c @@ -83,27 +83,8 @@ void average(int argc, char *argv[]) save_weights(sum, outfile); } -void speed(char *cfgfile, int tics) +long numops(network net) { - if (tics == 0) tics = 1000; - network net = parse_network_cfg(cfgfile); - set_batch_network(&net, 1); - int i; - double time=what_time_is_it_now(); - image im = make_image(net.w, net.h, net.c*net.batch); - for(i = 0; i < tics; ++i){ - network_predict(net, im.data); - } - double t = what_time_is_it_now() - time; - printf("\n%d evals, %f Seconds\n", tics, t); - printf("Speed: %f sec/eval\n", t/tics); - printf("Speed: %f Hz\n", tics/t); -} - -void operations(char *cfgfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); int i; long ops = 0; for(i = 0; i < net.n; ++i){ @@ -134,6 +115,34 @@ void operations(char *cfgfile) ops += 2l * l.wo->inputs * l.wo->outputs; } } + return ops; +} + +void speed(char *cfgfile, int tics) +{ + if (tics == 0) tics = 1000; + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + int i; + double time=what_time_is_it_now(); + image im = make_image(net.w, net.h, net.c*net.batch); + for(i = 0; i < tics; ++i){ + network_predict(net, im.data); + } + double t = what_time_is_it_now() - time; + long ops = numops(net); + printf("\n%d evals, %f Seconds\n", tics, t); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); + printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); + printf("Speed: %f sec/eval\n", t/tics); + printf("Speed: %f Hz\n", tics/t); +} + +void operations(char *cfgfile) +{ + gpu_index = -1; + network net = parse_network_cfg(cfgfile); + long ops = numops(net); printf("Floating Point Operations: %ld\n", ops); printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); } diff --git a/examples/detector.c b/examples/detector.c index 0537588c..4709b898 100644 --- a/examples/detector.c +++ b/examples/detector.c @@ -52,10 +52,10 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i args.d = &buffer; args.type = DETECTION_DATA; //args.type = INSTANCE_DATA; - args.threads = 8; + args.threads = 64; pthread_t load_thread = load_data(args); - clock_t time; + double time; int count = 0; //while(i*imgs < N*120){ while(get_current_batch(net) < net.max_batches){ @@ -78,7 +78,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i } net = nets[0]; } - time=clock(); + time=what_time_is_it_now(); pthread_join(load_thread, 0); train = buffer; load_thread = load_data(args); @@ -107,9 +107,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i } */ - printf("Loaded: %lf seconds\n", sec(clock()-time)); + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); - time=clock(); + time=what_time_is_it_now(); float loss = 0; #ifdef GPU if(ngpus == 1){ @@ -124,7 +124,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i avg_loss = avg_loss*.9 + loss*.1; i = get_current_batch(net); - printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs); if(i%100==0){ #ifdef GPU if(ngpus != 1) sync_nets(nets, ngpus, 0); @@ -313,7 +313,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char args.resized = &buf_resized[t]; thr[t] = load_data_in_thread(args); } - time_t start = time(0); + double start = what_time_is_it_now(); for(i = nthreads; i < m+nthreads; i += nthreads){ fprintf(stderr, "%d\n", i); for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ @@ -359,7 +359,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char fprintf(fp, "\n]\n"); fclose(fp); } - fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); } @@ -447,7 +447,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out args.resized = &buf_resized[t]; thr[t] = load_data_in_thread(args); } - time_t start = time(0); + double start = what_time_is_it_now(); for(i = nthreads; i < m+nthreads; i += nthreads){ fprintf(stderr, "%d\n", i); for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ @@ -490,7 +490,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out fprintf(fp, "\n]\n"); fclose(fp); } - fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); } void validate_detector_recall(char *cfgfile, char *weightfile) diff --git a/examples/rnn.c b/examples/rnn.c index 45e8deba..8169f510 100644 --- a/examples/rnn.c +++ b/examples/rnn.c @@ -52,6 +52,7 @@ char **read_tokens(char *filename, size_t *read) return d; } + float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) { float *x = calloc(batch * steps * characters, sizeof(float)); @@ -78,6 +79,35 @@ float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size return p; } +float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps) +{ + int i,j; + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + for(i = 0; i < batch; ++i){ + int index = rand()%n; + for(j = 0; j < steps; ++j){ + unsigned char curr = source[index][j]; + unsigned char next = dest[index][j]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) { float *x = calloc(batch * steps * characters, sizeof(float)); diff --git a/examples/segmenter.c b/examples/segmenter.c index 2c1979d4..137ecd8e 100644 --- a/examples/segmenter.c +++ b/examples/segmenter.c @@ -211,7 +211,7 @@ void demo_segmenter(char *datacfg, char *cfgfile, char *weightfile, int cam_inde image in = get_image_from_stream(cap); image in_s = letterbox_image(in, net.w, net.h); - float *predictions = network_predict(net, in_s.data); + network_predict(net, in_s.data); printf("\033[2J"); printf("\033[1;1H"); diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 73530056..80a849f7 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -10,8 +10,8 @@ extern "C" { __device__ float lhtan_activate_kernel(float x) { - if(x < 0) return .001*x; - if(x > 1) return .001*(x-1) + 1; + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1.f) + 1.f; return x; } __device__ float lhtan_gradient_kernel(float x) @@ -27,25 +27,25 @@ __device__ float hardtan_activate_kernel(float x) return x; } __device__ float linear_activate_kernel(float x){return x;} -__device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));} -__device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} __device__ float relu_activate_kernel(float x){return x*(x>0);} -__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} -__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;} -__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;} -__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;} -__device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} __device__ float plse_activate_kernel(float x) { - if(x < -4) return .01 * (x + 4); - if(x > 4) return .01 * (x - 4) + 1; - return .125*x + .5; + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; } __device__ float stair_activate_kernel(float x) { - int n = floor(x); - if (n%2 == 0) return floor(x/2.); - else return (x - n) + floor(x/2.); + int n = floorf(x); + if (n%2 == 0) return floorf(x/2); + else return (x - n) + floorf(x/2); } @@ -58,19 +58,19 @@ __device__ float linear_gradient_kernel(float x){return 1;} __device__ float logistic_gradient_kernel(float x){return (1-x)*x;} __device__ float loggy_gradient_kernel(float x) { - float y = (x+1.)/2.; + float y = (x+1)/2; return 2*(1-y)*y; } __device__ float relu_gradient_kernel(float x){return (x>0);} __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} -__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;} -__device__ float ramp_gradient_kernel(float x){return (x>0)+.1;} -__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;} +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} __device__ float tanh_gradient_kernel(float x){return 1-x*x;} -__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} __device__ float stair_gradient_kernel(float x) { - if (floor(x) == x) return 0; + if (floorf(x) == x) return 0; return 1; } diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 867db038..a483f2eb 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -165,7 +165,7 @@ __global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (index >= N) return; - x[index] = x[index] + (rate * sqrt(1.-pow(B2, t)) / (1.-pow(B1, t)) * m[index] / (sqrt(v[index]) + eps)); + x[index] = x[index] + (rate * sqrtf(1.f-powf(B2, t)) / (1.f-powf(B1, t)) * m[index] / (sqrtf(v[index]) + eps)); } extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) @@ -194,7 +194,7 @@ __global__ void normalize_kernel(int N, float *x, float *mean, float *variance, if (index >= N) return; int f = (index/spatial)%filters; - x[index] = (x[index] - mean[f])/(sqrt(variance[f] + .00001f)); + x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f)); } __global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) @@ -203,7 +203,7 @@ __global__ void normalize_delta_kernel(int N, float *x, float *mean, float *vari if (index >= N) return; int f = (index/spatial)%filters; - delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); + delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); } extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) @@ -225,7 +225,7 @@ __global__ void variance_delta_kernel(float *x, float *delta, float *mean, floa variance_delta[i] += delta[index]*(x[index] - mean[i]); } } - variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); + variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f)); } __global__ void accumulate_kernel(float *x, int n, int groups, float *sum) @@ -264,7 +264,7 @@ __global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, for(i = 0; i < threads; ++i){ mean_delta[filter] += local[i]; } - mean_delta[filter] *= (-1./sqrt(variance[filter] + .00001f)); + mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f)); } } @@ -294,7 +294,7 @@ __global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, for(i = 0; i < threads; ++i){ variance_delta[filter] += local[i]; } - variance_delta[filter] *= -.5 * pow(variance[filter] + .00001f, (float)(-3./2.)); + variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f)); } } @@ -311,7 +311,7 @@ __global__ void mean_delta_kernel(float *delta, float *variance, int batch, int mean_delta[i] += delta[index]; } } - mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); + mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f)); } extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) @@ -334,7 +334,7 @@ extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, flo __global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) { - float scale = 1./(batch * spatial); + float scale = 1.f/(batch * spatial); int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (i >= filters) return; int j,k; @@ -350,7 +350,7 @@ __global__ void mean_kernel(float *x, int batch, int filters, int spatial, floa __global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) { - float scale = 1./(batch * spatial - 1); + float scale = 1.f/(batch * spatial - 1); int j,k; int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (i >= filters) return; @@ -358,7 +358,7 @@ __global__ void variance_kernel(float *x, float *mean, int batch, int filters, i for(j = 0; j < batch; ++j){ for(k = 0; k < spatial; ++k){ int index = j*filters*spatial + i*spatial + k; - variance[i] += pow((x[index] - mean[i]), 2); + variance[i] += powf((x[index] - mean[i]), 2); } } variance[i] *= scale; @@ -516,7 +516,7 @@ __global__ void fast_variance_kernel(float *x, float *mean, int batch, int filt for(i = 0; i < spatial; i += threads){ int index = j*spatial*filters + filter*spatial + i + id; - local[id] += (i+id < spatial) ? pow((x[index] - mean[filter]), 2) : 0; + local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; } } @@ -716,7 +716,7 @@ __global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if(i < n){ float diff = truth[i] - pred[i]; - float abs_val = abs(diff); + float abs_val = fabsf(diff); if(abs_val < 1) { error[i] = diff * diff; delta[i] = diff; @@ -864,7 +864,7 @@ __device__ void softmax_device(float *input, int n, float temp, int stride, floa largest = (val>largest) ? val : largest; } for(i = 0; i < n; ++i){ - float e = exp(input[i*stride]/temp - largest/temp); + float e = expf(input[i*stride]/temp - largest/temp); sum += e; output[i*stride] = e; } diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index b9b6f455..749b4c5e 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -33,7 +33,7 @@ __global__ void binarize_input_kernel(float *input, int n, int size, float *bina int i = 0; float mean = 0; for(i = 0; i < n; ++i){ - mean += abs(input[i*size + s]); + mean += fabsf(input[i*size + s]); } mean = mean / n; for(i = 0; i < n; ++i){ @@ -55,7 +55,7 @@ __global__ void binarize_weights_kernel(float *weights, int n, int size, float * int i = 0; float mean = 0; for(i = 0; i < size; ++i){ - mean += abs(weights[f*size + i]); + mean += fabsf(weights[f*size + i]); } mean = mean / size; for(i = 0; i < size; ++i){ @@ -139,8 +139,8 @@ __global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, fl id /= c; int b = id; - int w_offset = -(size/2.); - int h_offset = -(size/2.); + int w_offset = -(size/2.f); + int h_offset = -(size/2.f); int out_index = j + w*(i + h*(k + c*b)); int l, m; diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu index b6568219..b5b9f554 100644 --- a/src/crop_layer_kernels.cu +++ b/src/crop_layer_kernels.cu @@ -113,9 +113,9 @@ __global__ void levels_image_kernel(float *image, float *rand, int batch, int w, float r3 = rand[8*id + 3]; saturation = r0*(saturation - 1) + 1; - saturation = (r1 > .5) ? 1./saturation : saturation; + saturation = (r1 > .5f) ? 1.f/saturation : saturation; exposure = r2*(exposure - 1) + 1; - exposure = (r3 > .5) ? 1./exposure : exposure; + exposure = (r3 > .5f) ? 1.f/exposure : exposure; size_t offset = id * h * w * 3; image += offset; @@ -131,9 +131,9 @@ __global__ void levels_image_kernel(float *image, float *rand, int batch, int w, } else { shift = 0; } - image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5)*shift; - image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5)*shift; - image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5)*shift; + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; } __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) @@ -141,8 +141,8 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if(id >= size) return; - float cx = w/2.; - float cy = h/2.; + float cx = w/2.f; + float cy = h/2.f; int count = id; int j = id % crop_width; @@ -160,11 +160,11 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i float dw = (w - crop_width)*r4; float dh = (h - crop_height)*r5; - flip = (flip && (r6 > .5)); + flip = (flip && (r6 > .5f)); angle = 2*angle*r7 - angle; if(!train){ - dw = (w - crop_width)/2.; - dh = (h - crop_height)/2.; + dw = (w - crop_width)/2.f; + dh = (h - crop_height)/2.f; flip = 0; angle = 0; } @@ -174,8 +174,8 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i float x = (flip) ? w - dw - j - 1 : j + dw; float y = i + dh; - float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx; - float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy; + float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; + float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); } @@ -184,7 +184,7 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network net) { cuda_random(layer.rand_gpu, layer.batch*8); - float radians = layer.angle*3.14159265/180.; + float radians = layer.angle*3.14159265f/180.f; float scale = 2; float translate = -1; diff --git a/src/data.c b/src/data.c index e060e937..36b1286a 100644 --- a/src/data.c +++ b/src/data.c @@ -137,14 +137,18 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, box_label *read_boxes(char *filename, int *n) { - box_label *boxes = calloc(1, sizeof(box_label)); FILE *file = fopen(filename, "r"); if(!file) file_error(filename); float x, y, h, w; int id; int count = 0; + int size = 64; + box_label *boxes = calloc(size, sizeof(box_label)); while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ - boxes = realloc(boxes, (count+1)*sizeof(box_label)); + if(count == size) { + size = size * 2; + boxes = realloc(boxes, size*sizeof(box_label)); + } boxes[count].id = id; boxes[count].x = x; boxes[count].y = y; @@ -976,6 +980,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in place_image(orig, nw, nh, dx, dy, sized); random_distort_image(sized, hue, saturation, exposure); + int flip = rand()%2; if(flip) flip_image(sized); d.X.vals[i] = sized.data; diff --git a/src/demo.c b/src/demo.c index a60c456d..28a6ddc6 100644 --- a/src/demo.c +++ b/src/demo.c @@ -20,10 +20,6 @@ static int demo_classes; static float **probs; static box *boxes; static network net; -static network net2; -static float **probs2; -static box *boxes2; -static float **predictions2; static image buff [3]; static image buff_letter[3]; static int buff_index = 0; diff --git a/src/image.c b/src/image.c index ece5ff50..fc8d08d1 100644 --- a/src/image.c +++ b/src/image.c @@ -44,6 +44,51 @@ image mask_to_rgb(image mask) return im; } +static float get_pixel(image m, int x, int y, int c) +{ + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; +} +static float get_pixel_extend(image m, int x, int y, int c) +{ + if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if(c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); +} +static void set_pixel(image m, int x, int y, int c, float val) +{ + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; +} +static void add_pixel(image m, int x, int y, int c, float val) +{ + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; +} + +static float bilinear_interpolate(image im, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + return val; +} + + void composite_image(image source, image dest, int dx, int dy) { int x,y,k; @@ -1255,21 +1300,6 @@ void saturate_exposure_image(image im, float sat, float exposure) constrain_image(im); } -float bilinear_interpolate(image im, float x, float y, int c) -{ - int ix = (int) floorf(x); - int iy = (int) floorf(y); - - float dx = x - ix; - float dy = y - iy; - - float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + - dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + - (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + - dy * dx * get_pixel_extend(im, ix+1, iy+1, c); - return val; -} - image resize_image(image im, int w, int h) { image resized = make_image(w, h, im.c); @@ -1419,36 +1449,6 @@ image get_image_layer(image m, int l) } return out; } - -float get_pixel(image m, int x, int y, int c) -{ - assert(x < m.w && y < m.h && c < m.c); - return m.data[c*m.h*m.w + y*m.w + x]; -} -float get_pixel_extend(image m, int x, int y, int c) -{ - if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; - /* - if(x < 0) x = 0; - if(x >= m.w) x = m.w-1; - if(y < 0) y = 0; - if(y >= m.h) y = m.h-1; - */ - if(c < 0 || c >= m.c) return 0; - return get_pixel(m, x, y, c); -} -void set_pixel(image m, int x, int y, int c, float val) -{ - if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; - assert(x < m.w && y < m.h && c < m.c); - m.data[c*m.h*m.w + y*m.w + x] = val; -} -void add_pixel(image m, int x, int y, int c, float val) -{ - assert(x < m.w && y < m.h && c < m.c); - m.data[c*m.h*m.w + y*m.w + x] += val; -} - void print_image(image m) { int i, j, k; diff --git a/src/image.h b/src/image.h index 02c79f08..4ff0eacb 100644 --- a/src/image.h +++ b/src/image.h @@ -60,12 +60,6 @@ void print_image(image m); image make_empty_image(int w, int h, int c); void copy_image_into(image src, image dest); -float get_pixel(image m, int x, int y, int c); -float get_pixel_extend(image m, int x, int y, int c); -void set_pixel(image m, int x, int y, int c, float val); -void add_pixel(image m, int x, int y, int c, float val); -float bilinear_interpolate(image im, float x, float y, int c); - image get_image_layer(image m, int l); #endif diff --git a/src/utils.h b/src/utils.h index 1593e62b..4e467075 100644 --- a/src/utils.h +++ b/src/utils.h @@ -5,7 +5,14 @@ #include "darknet.h" #include "list.h" -#define TWO_PI 6.2831853071795864769252866 +#define TIME(a) \ + do { \ + double start = what_time_is_it_now(); \ + a; \ + printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ + } while (0) + +#define TWO_PI 6.2831853071795864769252866f double what_time_is_it_now(); void shuffle(void *arr, size_t n, size_t size);