From 6e7c7976d704be06d2d0de24f46e15a6bba56e18 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Mon, 18 Feb 2019 11:32:04 +0100
Subject: [PATCH] move as many defs as possible away from darknet.h

---
 .gitignore                     |  9 ------
 include/darknet.h              | 34 +--------------------
 include/yolo_v2_class.hpp      |  2 ++
 src/activations.c              |  1 -
 src/art.c                      |  3 +-
 src/avgpool_layer.c            |  1 -
 src/avgpool_layer_kernels.cu   |  1 -
 src/batchnorm_layer.c          |  2 +-
 src/blas.c                     |  2 +-
 src/blas_kernels.cu            |  2 +-
 src/box.c                      |  8 ++---
 src/captcha.c                  |  1 -
 src/cifar.c                    |  2 --
 src/classifier.c               |  2 --
 src/col2im.c                   |  3 +-
 src/col2im_kernels.cu          |  1 -
 src/compare.c                  |  4 +--
 src/cost_layer.c               |  1 -
 src/cpu_gemm.c                 | 20 ++++++-------
 src/crop_layer.c               |  5 ++--
 src/crop_layer_kernels.cu      |  1 -
 src/cuda.h                     |  4 +++
 src/darknet.c                  |  1 -
 src/data.c                     |  3 +-
 src/deconvolutional_kernels.cu |  3 +-
 src/deconvolutional_layer.c    |  5 +---
 src/detection_layer.c          |  2 +-
 src/detector.c                 |  8 +++++
 src/dice.c                     |  1 -
 src/dropout_layer.c            |  3 +-
 src/gemm.c                     |  9 ++++--
 src/gettimeofday.c             |  6 ++--
 src/gettimeofday.h             |  3 +-
 src/http_stream.h              |  2 +-
 src/im2col.c                   |  3 +-
 src/im2col_kernels.cu          |  1 -
 src/image.c                    |  2 +-
 src/list.c                     |  2 +-
 src/local_layer.c              | 18 ++++++------
 src/lstm_layer.h               |  4 +--
 src/maxpool_layer.c            |  1 -
 src/maxpool_layer_kernels.cu   |  1 -
 src/network.h                  |  1 -
 src/network_kernels.cu         |  1 -
 src/nightmare.c                |  5 ++--
 src/normalization_layer.c      |  6 ++--
 src/region_layer.c             |  3 +-
 src/rnn_vid.c                  |  1 -
 src/softmax_layer.c            |  6 ++--
 src/stb_image_write.h          | 54 +++++++++++++++++-----------------
 src/tag.c                      |  1 -
 src/tree.c                     |  4 +--
 src/upsample_layer.c           |  2 +-
 src/writing.c                  |  1 -
 src/yolo_layer.c               |  1 -
 55 files changed, 109 insertions(+), 164 deletions(-)

diff --git a/.gitignore b/.gitignore
index da2d106e..f2492274 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,12 +24,3 @@ ehthumbs.db
 Icon?
 Thumbs.db
 *.swp
-
-# CMake #
-cmake-build-debug/
-CMakeLists.txt
-build_*/
-build.*
-cmake/
-*.patch
-.gitignore
diff --git a/include/darknet.h b/include/darknet.h
index e35524de..40683712 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -26,40 +26,10 @@
 #endif
 #endif
 
-#ifdef _WIN32
-#define PORT unsigned long
-#define ADDRPOINTER int*
-#else
-#define PORT unsigned short
-#define SOCKET int
-#define HOSTENT struct hostent
-#define SOCKADDR struct sockaddr
-#define SOCKADDR_IN struct sockaddr_in
-#define ADDRPOINTER unsigned int*
-#define INVALID_SOCKET -1
-#define SOCKET_ERROR -1
-#endif
-#define FULL_MASK 0xffffffff
-#define WARP_SIZE 32
-#define BLOCK 512
-#define NUMCHARS 37
 #define NFRAMES 3
-#define BLOCK_TRANSPOSE32 256
-#define DOABS 1
 #define SECRET_NUM -1234
-#define C_SHARP_MAX_OBJECTS 1000
-#define TILE_M 4 // 4 ops
-#define TILE_N 16 // AVX2 = 2 ops * 8 floats
-#define TILE_K 16 // loop
-#ifndef __COMPAR_FN_T
-#define __COMPAR_FN_T
-typedef int (*__compar_fn_t)(const void*, const void*);
-#ifdef __USE_GNU
-typedef __compar_fn_t comparison_fn_t;
-#endif
-#endif
+
 #ifdef GPU
-#define BLOCK 512
 
 #include "cuda_runtime.h"
 #include "curand.h"
@@ -101,8 +71,6 @@ typedef struct metadata metadata;
 struct tree;
 typedef struct tree tree;
 
-
-#define SECRET_NUM -1234
 extern int gpu_index;
 
 // option_list.h
diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp
index f7f13bda..e687b1e8 100644
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@@ -3,6 +3,8 @@
 
 #include "darknet.h"
 
+#define C_SHARP_MAX_OBJECTS 1000
+
 struct bbox_t {
     unsigned int x, y, w, h;    // (x,y) - top-left corner, (w, h) - width & height of bounded box
     float prob;                    // confidence - probability that the object was found correctly
diff --git a/src/activations.c b/src/activations.c
index 7aba7e26..245fffa9 100644
--- a/src/activations.c
+++ b/src/activations.c
@@ -162,4 +162,3 @@ void gradient_array(const float *x, const int n, const ACTIVATION a, float *delt
         delta[i] *= gradient(x[i], a);
     }
 }
-
diff --git a/src/art.c b/src/art.c
index 364cca3d..e4471767 100644
--- a/src/art.c
+++ b/src/art.c
@@ -38,7 +38,7 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)
 
     char *window = "ArtJudgementBot9000!!!";
     if(!cap) error("Couldn't connect to webcam.\n");
-    cvNamedWindow(window, CV_WINDOW_NORMAL); 
+    cvNamedWindow(window, CV_WINDOW_NORMAL);
     cvResizeWindow(window, 512, 512);
     int i;
     int idx[] = {37, 401, 434};
@@ -84,4 +84,3 @@ void run_art(int argc, char **argv)
     char *weights = argv[3];
     demo_art(cfg, weights, cam_index);
 }
-
diff --git a/src/avgpool_layer.c b/src/avgpool_layer.c
index 983fe759..bae5ff4d 100644
--- a/src/avgpool_layer.c
+++ b/src/avgpool_layer.c
@@ -68,4 +68,3 @@ void backward_avgpool_layer(const avgpool_layer l, network_state state)
         }
     }
 }
-
diff --git a/src/avgpool_layer_kernels.cu b/src/avgpool_layer_kernels.cu
index 1bb48ae6..b38ba450 100644
--- a/src/avgpool_layer_kernels.cu
+++ b/src/avgpool_layer_kernels.cu
@@ -56,4 +56,3 @@ extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state st
     backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
-
diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c
index d6ce989e..aed1a1dd 100644
--- a/src/batchnorm_layer.c
+++ b/src/batchnorm_layer.c
@@ -273,4 +273,4 @@ void backward_batchnorm_layer_gpu(layer l, network_state state)
     if (l.type == BATCHNORM) simple_copy_ongpu(l.outputs*l.batch, l.delta_gpu, state.delta);
         //copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/src/blas.c b/src/blas.c
index 32d44cb5..09033e25 100644
--- a/src/blas.c
+++ b/src/blas.c
@@ -333,4 +333,4 @@ void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int for
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index 72d92129..45ed09cd 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -974,4 +974,4 @@ extern "C" void softmax_tree_gpu(float *input, int spatial, int batch, int strid
     CHECK_CUDA(cudaPeekAtLastError());
 	cuda_free((float *)tree_groups_size);
 	cuda_free((float *)tree_groups_offset);
-}
\ No newline at end of file
+}
diff --git a/src/box.c b/src/box.c
index bc4958a2..28371362 100644
--- a/src/box.c
+++ b/src/box.c
@@ -98,9 +98,9 @@ float box_iou(box a, box b)
 
 float box_rmse(box a, box b)
 {
-    return sqrt(pow(a.x-b.x, 2) + 
-                pow(a.y-b.y, 2) + 
-                pow(a.w-b.w, 2) + 
+    return sqrt(pow(a.x-b.x, 2) +
+                pow(a.y-b.y, 2) +
+                pow(a.w-b.w, 2) +
                 pow(a.h-b.h, 2));
 }
 
@@ -252,7 +252,7 @@ void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thr
     sortable_bbox* s = (sortable_bbox*)calloc(total, sizeof(sortable_bbox));
 
     for(i = 0; i < total; ++i){
-        s[i].index = i;       
+        s[i].index = i;
         s[i].class_id = 0;
         s[i].probs = probs;
     }
diff --git a/src/captcha.c b/src/captcha.c
index 230c723f..0bb15b84 100644
--- a/src/captcha.c
+++ b/src/captcha.c
@@ -361,4 +361,3 @@ void run_captcha(int argc, char **argv)
     //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights);
     //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights);
 }
-
diff --git a/src/cifar.c b/src/cifar.c
index a9daa979..04dec155 100644
--- a/src/cifar.c
+++ b/src/cifar.c
@@ -273,5 +273,3 @@ void run_cifar(int argc, char **argv)
     else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights);
     else if(0==strcmp(argv[2], "eval")) eval_cifar_csv();
 }
-
-
diff --git a/src/classifier.c b/src/classifier.c
index 24b956d7..ed6951d5 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -1295,5 +1295,3 @@ void run_classifier(int argc, char **argv)
     else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights);
     else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights);
 }
-
-
diff --git a/src/col2im.c b/src/col2im.c
index 43126423..925d054c 100644
--- a/src/col2im.c
+++ b/src/col2im.c
@@ -14,7 +14,7 @@ void col2im_add_pixel(float *im, int height, int width, int channels,
 //This one might be too, can't remember.
 void col2im_cpu(float* data_col,
          int channels,  int height,  int width,
-         int ksize,  int stride, int pad, float* data_im) 
+         int ksize,  int stride, int pad, float* data_im)
 {
     int c,h,w;
     int height_col = (height + 2*pad - ksize) / stride + 1;
@@ -37,4 +37,3 @@ void col2im_cpu(float* data_col,
         }
     }
 }
-
diff --git a/src/col2im_kernels.cu b/src/col2im_kernels.cu
index 34808c00..e3b1d233 100644
--- a/src/col2im_kernels.cu
+++ b/src/col2im_kernels.cu
@@ -55,4 +55,3 @@ void col2im_ongpu(float *data_col,
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
-
diff --git a/src/compare.c b/src/compare.c
index d0d1f1f0..5c1e0451 100644
--- a/src/compare.c
+++ b/src/compare.c
@@ -180,7 +180,7 @@ int bbox_comparator(const void *a, const void *b)
     memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
     memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
     float *predictions = network_predict(net, X);
-    
+
     free_image(im1);
     free_image(im2);
     free(X);
@@ -218,7 +218,7 @@ void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, in
             bbox_update(a, b, i, result);
         }
     }
-    
+
     free_image(im1);
     free_image(im2);
     free(X);
diff --git a/src/cost_layer.c b/src/cost_layer.c
index 39913d67..33fd8572 100644
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@@ -145,4 +145,3 @@ void backward_cost_layer_gpu(const cost_layer l, network_state state)
     axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1);
 }
 #endif
-
diff --git a/src/cpu_gemm.c b/src/cpu_gemm.c
index 6a3cd1dc..8305bb53 100644
--- a/src/cpu_gemm.c
+++ b/src/cpu_gemm.c
@@ -1,7 +1,7 @@
 //#include "mini_blas.h"
 
-void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
         float *B, int ldb,
         float BETA,
         float *C, int ldc)
@@ -17,8 +17,8 @@ void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
     }
 }
 
-void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
         float *B, int ldb,
         float BETA,
         float *C, int ldc)
@@ -35,8 +35,8 @@ void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
     }
 }
 
-void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
         float *B, int ldb,
         float BETA,
         float *C, int ldc)
@@ -51,8 +51,8 @@ void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
         }
     }
 }
-void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
         float *B, int ldb,
         float BETA,
         float *C, int ldc)
@@ -68,8 +68,8 @@ void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA,
 }
 
 
-void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
         float *B, int ldb,
         float BETA,
         float *C, int ldc)
diff --git a/src/crop_layer.c b/src/crop_layer.c
index 816f00e1..258030be 100644
--- a/src/crop_layer.c
+++ b/src/crop_layer.c
@@ -88,16 +88,15 @@ void forward_crop_layer(const crop_layer l, network_state state)
             for(i = 0; i < l.out_h; ++i){
                 for(j = 0; j < l.out_w; ++j){
                     if(flip){
-                        col = l.w - dw - j - 1;    
+                        col = l.w - dw - j - 1;
                     }else{
                         col = j + dw;
                     }
                     row = i + dh;
-                    index = col+l.w*(row+l.h*(c + l.c*b)); 
+                    index = col+l.w*(row+l.h*(c + l.c*b));
                     l.output[count++] = state.input[index]*scale + trans;
                 }
             }
         }
     }
 }
-
diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu
index 724196c3..5b084fe5 100644
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@@ -220,4 +220,3 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
        cvWaitKey(0);
        */
 }
-
diff --git a/src/cuda.h b/src/cuda.h
index 4010494e..fe33a258 100644
--- a/src/cuda.h
+++ b/src/cuda.h
@@ -13,6 +13,10 @@ extern int gpu_index;
 
 #ifdef GPU
 
+#define BLOCK 512
+#define FULL_MASK 0xffffffff
+#define WARP_SIZE 32
+#define BLOCK_TRANSPOSE32 256
 
 #include <cuda_runtime.h>
 #include <curand.h>
diff --git a/src/darknet.c b/src/darknet.c
index 87175bc3..95761463 100644
--- a/src/darknet.c
+++ b/src/darknet.c
@@ -540,4 +540,3 @@ int main(int argc, char **argv)
     }
     return 0;
 }
-
diff --git a/src/data.c b/src/data.c
index 09b1b2e2..80c08f48 100644
--- a/src/data.c
+++ b/src/data.c
@@ -7,6 +7,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#define NUMCHARS 37
+
 pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
 
 list *get_paths(char *filename)
@@ -1347,4 +1349,3 @@ data *split_data(data d, int part, int total)
     split[1] = test;
     return split;
 }
-
diff --git a/src/deconvolutional_kernels.cu b/src/deconvolutional_kernels.cu
index 88106ece..b0ba1a86 100644
--- a/src/deconvolutional_kernels.cu
+++ b/src/deconvolutional_kernels.cu
@@ -59,7 +59,7 @@ extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer,
         float *b = layer.col_image_gpu;
         float *c = layer.weight_updates_gpu;
 
-        im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w, 
+        im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w,
                 layer.size, layer.stride, 0, b);
         gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
 
@@ -104,4 +104,3 @@ extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, in
     axpy_ongpu(size, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
     scal_ongpu(size, momentum, layer.weight_updates_gpu, 1);
 }
-
diff --git a/src/deconvolutional_layer.c b/src/deconvolutional_layer.c
index a109999f..a138fb53 100644
--- a/src/deconvolutional_layer.c
+++ b/src/deconvolutional_layer.c
@@ -173,7 +173,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
         float *b = l.col_image;
         float *c = l.weight_updates;
 
-        im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w, 
+        im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w,
                 l.size, l.stride, 0, b);
         gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
 
@@ -201,6 +201,3 @@ void update_deconvolutional_layer(deconvolutional_layer l, int skip, float learn
     axpy_cpu(size, learning_rate, l.weight_updates, 1, l.weights, 1);
     scal_cpu(size, momentum, l.weight_updates, 1);
 }
-
-
-
diff --git a/src/detection_layer.c b/src/detection_layer.c
index 4cfe591d..1379145b 100644
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@@ -312,4 +312,4 @@ void get_detection_detections(layer l, int w, int h, float thresh, detection *de
 			}
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/src/detector.c b/src/detector.c
index 96921689..98efaeff 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -8,6 +8,14 @@
 #include "demo.h"
 #include "option_list.h"
 
+#ifndef __COMPAR_FN_T
+#define __COMPAR_FN_T
+typedef int (*__compar_fn_t)(const void*, const void*);
+#ifdef __USE_GNU
+typedef __compar_fn_t comparison_fn_t;
+#endif
+#endif
+
 #ifdef OPENCV
 #include <opencv2/highgui/highgui_c.h>
 #include <opencv2/core/core_c.h>
diff --git a/src/dice.c b/src/dice.c
index 348a4aed..94155271 100644
--- a/src/dice.c
+++ b/src/dice.c
@@ -115,4 +115,3 @@ void run_dice(int argc, char **argv)
     else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights);
     else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights);
 }
-
diff --git a/src/dropout_layer.c b/src/dropout_layer.c
index 599acc8f..9eb22982 100644
--- a/src/dropout_layer.c
+++ b/src/dropout_layer.c
@@ -23,7 +23,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
     #endif
     fprintf(stderr, "dropout       p = %.2f               %4d  ->  %4d\n", probability, inputs, inputs);
     return l;
-} 
+}
 
 void resize_dropout_layer(dropout_layer *l, int inputs)
 {
@@ -57,4 +57,3 @@ void backward_dropout_layer(dropout_layer l, network_state state)
         else state.delta[i] *= l.scale;
     }
 }
-
diff --git a/src/gemm.c b/src/gemm.c
index 4b83c2f6..2fc9af02 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -15,6 +15,10 @@
 #include <omp.h>
 #endif
 
+#define TILE_M 4 // 4 ops
+#define TILE_N 16 // AVX2 = 2 ops * 8 floats
+#define TILE_K 16 // loop
+
 void gemm_bin(int M, int N, int K, float ALPHA,
         char  *A, int lda,
         float *B, int ldb,
@@ -1160,10 +1164,10 @@ static inline void xnor_avx2_popcnt(__m256i a_bit256, __m256i b_bit256, __m256i
     __m256i xor256 = _mm256_xor_si256(a_bit256, b_bit256);  // xnor = not(xor(a,b))
     c_bit256 = _mm256_andnot_si256(xor256, c_bit256);  // can be optimized - we can do other NOT for wegihts once and do not do this NOT
 
-    *count_sum = _mm256_add_epi64(count256(c_bit256), *count_sum);    //  1st part - popcnt Mula�s algorithm
+    *count_sum = _mm256_add_epi64(count256(c_bit256), *count_sum);    //  1st part - popcnt Mula's algorithm
 }
 
-// 2nd part - popcnt Mula�s algorithm
+// 2nd part - popcnt Mula's algorithm
 static inline int get_count_mula(__m256i count_sum) {
     return _mm256_extract_epi64(count_sum, 0)
         + _mm256_extract_epi64(count_sum, 1)
@@ -2827,4 +2831,3 @@ int test_gpu_blas()
     return 0;
 }
 #endif
-
diff --git a/src/gettimeofday.c b/src/gettimeofday.c
index 13fc9f74..b47d3cd7 100644
--- a/src/gettimeofday.c
+++ b/src/gettimeofday.c
@@ -8,7 +8,7 @@ LIB_API int gettimeofday(struct timeval* tp, struct timezone* tzp)
   FILETIME file_time;
   uint64_t time;
 
- 
+
   GetSystemTime(&system_time);
   SystemTimeToFileTime(&system_time, &file_time);
   time = ((uint64_t)file_time.dwLowDateTime);
@@ -18,7 +18,7 @@ LIB_API int gettimeofday(struct timeval* tp, struct timezone* tzp)
   tp->tv_usec = (long)(system_time.wMilliseconds * 1000);
   return 0;
   }
- 
+
 LIB_API int clock_gettime(int dummy, struct timespec* ct)
   {
   LARGE_INTEGER count;
@@ -30,7 +30,7 @@ LIB_API int clock_gettime(int dummy, struct timespec* ct)
       g_counts_per_sec.QuadPart = 0;
     }
   }
- 
+
   if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) || (0 == QueryPerformanceCounter(&count))) {
     return -1;
 }
diff --git a/src/gettimeofday.h b/src/gettimeofday.h
index 17d7a0f6..f92c841d 100644
--- a/src/gettimeofday.h
+++ b/src/gettimeofday.h
@@ -24,7 +24,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
- 
+
 static unsigned char g_first_time = 1;
 static LARGE_INTEGER g_counts_per_sec;
 
@@ -36,4 +36,3 @@ LIB_API int clock_gettime(int, struct timespec*);
 #endif
 
 #endif
-
diff --git a/src/http_stream.h b/src/http_stream.h
index d96d7d4b..1e9a3cf1 100644
--- a/src/http_stream.h
+++ b/src/http_stream.h
@@ -36,4 +36,4 @@ image load_image_resize(char *filename, int w, int h, int c, image *im);
 }
 #endif
 
-#endif // HTTP_STREAM_H
\ No newline at end of file
+#endif // HTTP_STREAM_H
diff --git a/src/im2col.c b/src/im2col.c
index 69ec98a9..40b5251d 100644
--- a/src/im2col.c
+++ b/src/im2col.c
@@ -15,7 +15,7 @@ float im2col_get_pixel(float *im, int height, int width, int channels,
 //https://github.com/BVLC/caffe/blob/master/LICENSE
 void im2col_cpu(float* data_im,
      int channels,  int height,  int width,
-     int ksize,  int stride, int pad, float* data_col) 
+     int ksize,  int stride, int pad, float* data_col)
 {
     int c,h,w;
     int height_col = (height + 2*pad - ksize) / stride + 1;
@@ -37,4 +37,3 @@ void im2col_cpu(float* data_im,
         }
     }
 }
-
diff --git a/src/im2col_kernels.cu b/src/im2col_kernels.cu
index 05a89c1b..90af12b6 100644
--- a/src/im2col_kernels.cu
+++ b/src/im2col_kernels.cu
@@ -2214,4 +2214,3 @@ void convolve_bin_gpu(float *input, float *weights, float *output, int in_w, int
 }
 
 // --------------------------------
-
diff --git a/src/image.c b/src/image.c
index 31a1819d..c3a24d37 100644
--- a/src/image.c
+++ b/src/image.c
@@ -2159,4 +2159,4 @@ LIB_API void copy_image_from_bytes(image im, char *pdata)
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/list.c b/src/list.c
index 79fee08d..6ba09302 100644
--- a/src/list.c
+++ b/src/list.c
@@ -34,7 +34,7 @@ void *list_pop(list *l){
     if(l->back) l->back->next = 0;
     free(b);
     --l->size;
-    
+
     return val;
 }
 
diff --git a/src/local_layer.c b/src/local_layer.c
index f304511f..9c68e9d8 100644
--- a/src/local_layer.c
+++ b/src/local_layer.c
@@ -60,7 +60,7 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
     l.col_image = (float*)calloc(out_h * out_w * size * size * c, sizeof(float));
     l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
     l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
-    
+
     l.forward = forward_local_layer;
     l.backward = backward_local_layer;
     l.update = update_local_layer;
@@ -101,7 +101,7 @@ void forward_local_layer(const local_layer l, network_state state)
 
     for(i = 0; i < l.batch; ++i){
         float *input = state.input + i*l.w*l.h*l.c;
-        im2col_cpu(input, l.c, l.h, l.w, 
+        im2col_cpu(input, l.c, l.h, l.w,
                 l.size, l.stride, l.pad, l.col_image);
         float *output = l.output + i*l.outputs;
         for(j = 0; j < locations; ++j){
@@ -132,10 +132,10 @@ void backward_local_layer(local_layer l, network_state state)
 
     for(i = 0; i < l.batch; ++i){
         float *input = state.input + i*l.w*l.h*l.c;
-        im2col_cpu(input, l.c, l.h, l.w, 
+        im2col_cpu(input, l.c, l.h, l.w,
                 l.size, l.stride, l.pad, l.col_image);
 
-        for(j = 0; j < locations; ++j){ 
+        for(j = 0; j < locations; ++j){
             float *a = l.delta + i*l.outputs + j;
             float *b = l.col_image + j;
             float *c = l.weight_updates + j*l.size*l.size*l.c*l.n;
@@ -147,7 +147,7 @@ void backward_local_layer(local_layer l, network_state state)
         }
 
         if(state.delta){
-            for(j = 0; j < locations; ++j){ 
+            for(j = 0; j < locations; ++j){
                 float *a = l.weights + j*l.size*l.size*l.c*l.n;
                 float *b = l.delta + i*l.outputs + j;
                 float *c = l.col_image + j;
@@ -191,7 +191,7 @@ void forward_local_layer_gpu(const local_layer l, network_state state)
 
     for(i = 0; i < l.batch; ++i){
         float *input = state.input + i*l.w*l.h*l.c;
-        im2col_ongpu(input, l.c, l.h, l.w, 
+        im2col_ongpu(input, l.c, l.h, l.w,
                 l.size, l.stride, l.pad, l.col_image_gpu);
         float *output = l.output_gpu + i*l.outputs;
         for(j = 0; j < locations; ++j){
@@ -221,10 +221,10 @@ void backward_local_layer_gpu(local_layer l, network_state state)
 
     for(i = 0; i < l.batch; ++i){
         float *input = state.input + i*l.w*l.h*l.c;
-        im2col_ongpu(input, l.c, l.h, l.w, 
+        im2col_ongpu(input, l.c, l.h, l.w,
                 l.size, l.stride, l.pad, l.col_image_gpu);
 
-        for(j = 0; j < locations; ++j){ 
+        for(j = 0; j < locations; ++j){
             float *a = l.delta_gpu + i*l.outputs + j;
             float *b = l.col_image_gpu + j;
             float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n;
@@ -236,7 +236,7 @@ void backward_local_layer_gpu(local_layer l, network_state state)
         }
 
         if(state.delta){
-            for(j = 0; j < locations; ++j){ 
+            for(j = 0; j < locations; ++j){
                 float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
                 float *b = l.delta_gpu + i*l.outputs + j;
                 float *c = l.col_image_gpu + j;
diff --git a/src/lstm_layer.h b/src/lstm_layer.h
index d951ca37..ae544148 100644
--- a/src/lstm_layer.h
+++ b/src/lstm_layer.h
@@ -11,13 +11,13 @@ extern "C" {
 #endif
 LIB_API layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
 
-void forward_lstm_layer(layer l, network_state state); 
+void forward_lstm_layer(layer l, network_state state);
 void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay);
 
 #ifdef GPU
 void forward_lstm_layer_gpu(layer l, network_state state);
 void backward_lstm_layer_gpu(layer l, network_state state);
-void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); 
+void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
 #endif
 
 #ifdef __cplusplus
diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c
index 7d3f7974..6c2ebca2 100644
--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@@ -162,4 +162,3 @@ void backward_maxpool_layer(const maxpool_layer l, network_state state)
         state.delta[index] += l.delta[i];
     }
 }
-
diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu
index ec207a28..29aa257c 100644
--- a/src/maxpool_layer_kernels.cu
+++ b/src/maxpool_layer_kernels.cu
@@ -126,4 +126,3 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st
     backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
-
diff --git a/src/network.h b/src/network.h
index d082e7fc..002d596c 100644
--- a/src/network.h
+++ b/src/network.h
@@ -167,4 +167,3 @@ network combine_train_valid_networks(network net_train, network net_map);
 #endif
 
 #endif
-
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index 38c15387..2c016697 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -459,4 +459,3 @@ float *network_predict_gpu(network net, float *input)
     //cuda_free(state.input);   // will be freed in the free_network()
     return out;
 }
-
diff --git a/src/nightmare.c b/src/nightmare.c
index 433c93b7..2d4c7466 100644
--- a/src/nightmare.c
+++ b/src/nightmare.c
@@ -23,7 +23,7 @@ float abs_mean(float *x, int n)
 void calculate_loss(float *output, float *delta, int n, float thresh)
 {
     int i;
-    float mean = mean_array(output, n); 
+    float mean = mean_array(output, n);
     float var = variance_array(output, n);
     for(i = 0; i < n; ++i){
         if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i];
@@ -260,7 +260,7 @@ void run_nightmare(int argc, char **argv)
     for(e = 0; e < rounds; ++e){
         fprintf(stderr, "Iteration: ");
         fflush(stderr);
-        for(n = 0; n < iters; ++n){  
+        for(n = 0; n < iters; ++n){
             fprintf(stderr, "%d, ", n);
             fflush(stderr);
             if(reconstruct){
@@ -305,4 +305,3 @@ void run_nightmare(int argc, char **argv)
         im = resized;
     }
 }
-
diff --git a/src/normalization_layer.c b/src/normalization_layer.c
index 9c01f789..9d4afcbb 100644
--- a/src/normalization_layer.c
+++ b/src/normalization_layer.c
@@ -52,9 +52,9 @@ void resize_normalization_layer(layer *layer, int w, int h)
     layer->norms = (float*)realloc(layer->norms, h * w * c * batch * sizeof(float));
 #ifdef GPU
     cuda_free(layer->output_gpu);
-    cuda_free(layer->delta_gpu); 
-    cuda_free(layer->squared_gpu); 
-    cuda_free(layer->norms_gpu);   
+    cuda_free(layer->delta_gpu);
+    cuda_free(layer->squared_gpu);
+    cuda_free(layer->norms_gpu);
     layer->output_gpu =  cuda_make_array(layer->output, h * w * c * batch);
     layer->delta_gpu =   cuda_make_array(layer->delta, h * w * c * batch);
     layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
diff --git a/src/region_layer.c b/src/region_layer.c
index f7f82088..af61a043 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -9,6 +9,7 @@
 #include <string.h>
 #include <stdlib.h>
 
+#define DOABS 1
 
 region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes)
 {
@@ -588,4 +589,4 @@ void zero_objectness(layer l)
             l.output[obj_index] = 0;
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/rnn_vid.c b/src/rnn_vid.c
index c0fd3d36..7613ab25 100644
--- a/src/rnn_vid.c
+++ b/src/rnn_vid.c
@@ -213,4 +213,3 @@ void run_vid_rnn(int argc, char **argv)
 #else
 void run_vid_rnn(int argc, char **argv){}
 #endif
-
diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index 3efe0157..3cd607be 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -47,9 +47,9 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
     l.forward_gpu = forward_softmax_layer_gpu;
     l.backward_gpu = backward_softmax_layer_gpu;
 
-    l.output_gpu = cuda_make_array(l.output, inputs*batch); 
-    l.loss_gpu = cuda_make_array(l.loss, inputs*batch); 
-    l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 
+    l.output_gpu = cuda_make_array(l.output, inputs*batch);
+    l.loss_gpu = cuda_make_array(l.loss, inputs*batch);
+    l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
     #endif
     return l;
 }
diff --git a/src/stb_image_write.h b/src/stb_image_write.h
index 9d553e0d..274b1d81 100644
--- a/src/stb_image_write.h
+++ b/src/stb_image_write.h
@@ -81,7 +81,7 @@ USAGE:
 
    TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed
    data, set the global variable 'stbi_write_tga_with_rle' to 0.
-   
+
    JPEG does ignore alpha channels in input data; quality is between 1 and 100.
    Higher quality looks better but results in a bigger image.
    JPEG baseline (no JPEG progressive).
@@ -114,7 +114,7 @@ CREDITS:
       Thatcher Ulrich
       github:poppolopoppo
       Patrick Boettcher
-      
+
 LICENSE
 
   See end of file for license information.
@@ -1250,7 +1250,7 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, in
                              37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99};
    static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99,
                               99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99};
-   static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 
+   static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,
                                  1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f };
 
    int row, col, i, k;
@@ -1421,38 +1421,38 @@ This software is available under 2 licenses -- choose whichever you prefer.
 ------------------------------------------------------------------------------
 ALTERNATIVE A - MIT License
 Copyright (c) 2017 Sean Barrett
-Permission is hereby granted, free of charge, to any person obtaining a copy of 
-this software and associated documentation files (the "Software"), to deal in 
-the Software without restriction, including without limitation the rights to 
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
-of the Software, and to permit persons to whom the Software is furnished to do 
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
 so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all 
+The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ------------------------------------------------------------------------------
 ALTERNATIVE B - Public Domain (www.unlicense.org)
 This is free and unencumbered software released into the public domain.
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 
-software, either in source code form or as a compiled binary, for any purpose, 
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
 commercial or non-commercial, and by any means.
-In jurisdictions that recognize copyright laws, the author or authors of this 
-software dedicate any and all copyright interest in the software to the public 
-domain. We make this dedication for the benefit of the public at large and to 
-the detriment of our heirs and successors. We intend this dedication to be an 
-overt act of relinquishment in perpetuity of all present and future rights to 
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
 this software under copyright law.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ------------------------------------------------------------------------------
 */
diff --git a/src/tag.c b/src/tag.c
index f6cbc0fb..4033216e 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -150,4 +150,3 @@ void run_tag(int argc, char **argv)
     if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear);
     else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename);
 }
-
diff --git a/src/tree.c b/src/tree.c
index 0b320e3b..4383d693 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -40,7 +40,7 @@ void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leave
     for(j = 0; j < n; ++j){
         int parent = hier->parent[j];
         if(parent >= 0){
-            predictions[j] *= predictions[parent]; 
+            predictions[j] *= predictions[parent];
         }
     }
     if(only_leaves){
@@ -86,7 +86,7 @@ tree *read_tree(char *filename)
 {
     tree t = {0};
     FILE *fp = fopen(filename, "r");
-    
+
     char *line;
     int last_parent = -1;
     int group_size = 0;
diff --git a/src/upsample_layer.c b/src/upsample_layer.c
index dafc1b8d..d31dd49e 100644
--- a/src/upsample_layer.c
+++ b/src/upsample_layer.c
@@ -62,7 +62,7 @@ void resize_upsample_layer(layer *l, int w, int h)
     l->output_gpu  = cuda_make_array(l->output, l->outputs*l->batch);
     l->delta_gpu   = cuda_make_array(l->delta,  l->outputs*l->batch);
 #endif
-    
+
 }
 
 void forward_upsample_layer(const layer l, network_state net)
diff --git a/src/writing.c b/src/writing.c
index b6c49075..4acda60a 100644
--- a/src/writing.c
+++ b/src/writing.c
@@ -147,4 +147,3 @@ void run_writing(int argc, char **argv)
     if(0==strcmp(argv[2], "train")) train_writing(cfg, weights);
     else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename);
 }
-
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index 916391d1..134c9c90 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -485,4 +485,3 @@ void backward_yolo_layer_gpu(const layer l, network_state state)
     axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
 }
 #endif
-