improve compatibility with c++ compilers, prepare for CMake

2023-08-10 21:13:14 +03:00 · 2019-02-14 17:28:23 +01:00
parent 3d9c8530a0
commit b3579380dc
128 changed files with 1871 additions and 2258 deletions
--- a/.gitignore
+++ b/.gitignore
@ -28,3 +28,8 @@ Thumbs.db
 # CMake #
 cmake-build-debug/
 CMakeLists.txt
 build_*/
 build.*
 cmake/
 *.patch
 .gitignore
--- a/include/darknet.h
+++ b/include/darknet.h
@ -26,6 +26,38 @@
 #endif
 #endif
 #ifdef _WIN32
 #define PORT unsigned long
 #define ADDRPOINTER int*
 #else
 #define PORT unsigned short
 #define SOCKET int
 #define HOSTENT struct hostent
 #define SOCKADDR struct sockaddr
 #define SOCKADDR_IN struct sockaddr_in
 #define ADDRPOINTER unsigned int*
 #define INVALID_SOCKET -1
 #define SOCKET_ERROR -1
 #endif
 #define FULL_MASK 0xffffffff
 #define WARP_SIZE 32
 #define BLOCK 512
 #define NUMCHARS 37
 #define NFRAMES 3
 #define BLOCK_TRANSPOSE32 256
 #define DOABS 1
 #define SECRET_NUM -1234
 #define C_SHARP_MAX_OBJECTS 1000
 #define TILE_M 4 // 4 ops
 #define TILE_N 16 // AVX2 = 2 ops * 8 floats
 #define TILE_K 16 // loop
 #ifndef __COMPAR_FN_T
 #define __COMPAR_FN_T
 typedef int (*__compar_fn_t)(const void*, const void*);
 #ifdef __USE_GNU
 typedef __compar_fn_t comparison_fn_t;
 #endif
 #endif
 #ifdef GPU
 #define BLOCK 512
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@ -1,17 +1,7 @@
-#pragma once
+#ifndef YOLO_V2_CLASS_HPP
-#ifdef LIB_EXPORTS
+#define YOLO_V2_CLASS_HPP
-#if defined(_MSC_VER)
+
-#define LIB_API __declspec(dllexport)
+#include "darknet.h"
 #else
 #define LIB_API __attribute__((visibility("default")))
 #endif
 #else
 #if defined(_MSC_VER)
 #define LIB_API
 #else
 #define LIB_API
 #endif
 #endif
 struct bbox_t {
    unsigned int x, y, w, h;    // (x,y) - top-left corner, (w, h) - width & height of bounded box
@ -28,7 +18,6 @@ struct image_t {
    float *data;                // pointer to the image data
 };
 #define C_SHARP_MAX_OBJECTS 1000
 struct bbox_t_container {
    bbox_t candidates[C_SHARP_MAX_OBJECTS];
 };
@ -41,8 +30,8 @@ struct bbox_t_container {
 #ifdef OPENCV
 #include <opencv2/opencv.hpp>            // C++
-#include "opencv2/highgui/highgui_c.h"    // C
+#include <opencv2/highgui/highgui_c.h>   // C
-#include "opencv2/imgproc/imgproc_c.h"    // C
+#include <opencv2/imgproc/imgproc_c.h>   // C
 #endif    // OPENCV
 extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu);
@ -658,3 +647,4 @@ void free_img(image_t m) {
 #endif    // __cplusplus
 */
 #endif
--- a/src/activation_kernels.cu
+++ b/src/activation_kernels.cu
@ -3,10 +3,8 @@
 #include "curand.h"
 #include "cublas_v2.h"
 extern "C" {
 #include "activations.h"
 #include "cuda.h"
 }
 __device__ float lhtan_activate_kernel(float x)
--- a/src/activation_layer.c
+++ b/src/activation_layer.c
@ -11,15 +11,15 @@
 layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
 {
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
    l.type = ACTIVE;
    l.inputs = inputs;
    l.outputs = inputs;
    l.batch=batch;
-    l.output = calloc(batch*inputs, sizeof(float*));
+    l.output = (float*)calloc(batch * inputs, sizeof(float));
-    l.delta = calloc(batch*inputs, sizeof(float*));
+    l.delta = (float*)calloc(batch * inputs, sizeof(float));
    l.forward = forward_activation_layer;
    l.backward = backward_activation_layer;
--- a/src/activation_layer.h
+++ b/src/activation_layer.h
@ -5,6 +5,9 @@
 #include "layer.h"
 #include "network.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
 void forward_activation_layer(layer l, network_state state);
@ -15,5 +18,8 @@ void forward_activation_layer_gpu(layer l, network_state state);
 void backward_activation_layer_gpu(layer l, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/activations.h
+++ b/src/activations.h
@ -8,6 +8,9 @@
 //    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
 //}ACTIVATION;
 #ifdef __cplusplus
 extern "C" {
 #endif
 ACTIVATION get_activation(char *s);
 char *get_activation_string(ACTIVATION a);
@ -87,5 +90,8 @@ static inline float leaky_gradient(float x){return (x>0) ? 1 : .1f;}
 static inline float tanh_gradient(float x){return 1-x*x;}
 static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01f : .125f;}
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/art.c
+++ b/src/art.c
@ -6,7 +6,6 @@
 #include "classifier.h"
 #ifdef WIN32
 #include <time.h>
 #include <winsock.h>
 #include "gettimeofday.h"
 #else
 #include <sys/time.h>
@ -14,10 +13,10 @@
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
-#include "opencv2/core/version.hpp"
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
 image get_image_from_stream(CvCapture *cap);
 #endif
--- a/src/avgpool_layer.c
+++ b/src/avgpool_layer.c
@ -5,7 +5,7 @@
 avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
 {
    fprintf(stderr, "avg                     %4d x%4d x%4d   ->  %4d\n",  w, h, c, c);
-    avgpool_layer l = {0};
+    avgpool_layer l = { (LAYER_TYPE)0 };
    l.type = AVGPOOL;
    l.batch = batch;
    l.h = h;
@ -17,8 +17,8 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
    l.outputs = l.out_c;
    l.inputs = h*w*c;
    int output_size = l.outputs * batch;
-    l.output =  calloc(output_size, sizeof(float));
+    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta =   calloc(output_size, sizeof(float));
+    l.delta = (float*)calloc(output_size, sizeof(float));
    l.forward = forward_avgpool_layer;
    l.backward = backward_avgpool_layer;
    #ifdef GPU
--- a/src/avgpool_layer.h
+++ b/src/avgpool_layer.h
@ -8,6 +8,9 @@
 typedef layer avgpool_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 image get_avgpool_image(avgpool_layer l);
 avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
 void resize_avgpool_layer(avgpool_layer *l, int w, int h);
@ -19,5 +22,8 @@ void forward_avgpool_layer_gpu(avgpool_layer l, network_state state);
 void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/avgpool_layer_kernels.cu
+++ b/src/avgpool_layer_kernels.cu
@ -2,10 +2,8 @@
 #include "curand.h"
 #include "cublas_v2.h"
 extern "C" {
 #include "avgpool_layer.h"
 #include "cuda.h"
 }
 __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
 {
--- a/src/batchnorm_layer.c
+++ b/src/batchnorm_layer.c
@ -5,29 +5,29 @@
 layer make_batchnorm_layer(int batch, int w, int h, int c)
 {
    fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
-    layer layer = {0};
+    layer layer = { (LAYER_TYPE)0 };
    layer.type = BATCHNORM;
    layer.batch = batch;
    layer.h = layer.out_h = h;
    layer.w = layer.out_w = w;
    layer.c = layer.out_c = c;
-    layer.output = calloc(h * w * c * batch, sizeof(float));
+    layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.delta  = calloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
    layer.inputs = w*h*c;
    layer.outputs = layer.inputs;
-    layer.scales = calloc(c, sizeof(float));
+    layer.scales = (float*)calloc(c, sizeof(float));
-    layer.scale_updates = calloc(c, sizeof(float));
+    layer.scale_updates = (float*)calloc(c, sizeof(float));
    int i;
    for(i = 0; i < c; ++i){
        layer.scales[i] = 1;
    }
-    layer.mean = calloc(c, sizeof(float));
+    layer.mean = (float*)calloc(c, sizeof(float));
-    layer.variance = calloc(c, sizeof(float));
+    layer.variance = (float*)calloc(c, sizeof(float));
-    layer.rolling_mean = calloc(c, sizeof(float));
+    layer.rolling_mean = (float*)calloc(c, sizeof(float));
-    layer.rolling_variance = calloc(c, sizeof(float));
+    layer.rolling_variance = (float*)calloc(c, sizeof(float));
    layer.forward = forward_batchnorm_layer;
    layer.backward = backward_batchnorm_layer;
--- a/src/batchnorm_layer.h
+++ b/src/batchnorm_layer.h
@ -5,6 +5,9 @@
 #include "layer.h"
 #include "network.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 layer make_batchnorm_layer(int batch, int w, int h, int c);
 void forward_batchnorm_layer(layer l, network_state state);
 void backward_batchnorm_layer(layer l, network_state state);
@ -16,4 +19,7 @@ void pull_batchnorm_layer(layer l);
 void push_batchnorm_layer(layer l);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/blas.c
+++ b/src/blas.c
@ -34,7 +34,7 @@ void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride,
 void flatten(float *x, int size, int layers, int batch, int forward)
 {
-    float *swap = calloc(size*layers*batch, sizeof(float));
+    float* swap = (float*)calloc(size * layers * batch, sizeof(float));
    int i,c,b;
    for(b = 0; b < batch; ++b){
        for(c = 0; c < layers; ++c){
--- a/src/blas.h
+++ b/src/blas.h
@ -1,5 +1,12 @@
 #ifndef BLAS_H
 #define BLAS_H
 #ifdef GPU
 #include "cuda.h"
 #include "tree.h"
 #endif
 #ifdef __cplusplus
 extern "C" {
 #endif
 void flatten(float *x, int size, int layers, int batch, int forward);
 void pm(int M, int N, float *A);
 float *random_matrix(int rows, int cols);
@ -41,8 +48,6 @@ void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, i
 void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
 #ifdef GPU
 #include "cuda.h"
 #include "tree.h"
 void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
 void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
@ -97,5 +102,8 @@ void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int for
 void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@ -3,12 +3,10 @@
 #include "cublas_v2.h"
 #include <assert.h>
 extern "C" {
 #include "blas.h"
 #include "cuda.h"
 #include "utils.h"
 #include "tree.h"
 }
 __global__ void scale_bias_kernel(float *output, float *biases, int n, int size)
 {
--- a/src/box.c
+++ b/src/box.c
@ -249,7 +249,7 @@ int nms_comparator(const void *pa, const void *pb)
 void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh)
 {
    int i, j, k;
-    sortable_bbox *s = calloc(total, sizeof(sortable_bbox));
+    sortable_bbox* s = (sortable_bbox*)calloc(total, sizeof(sortable_bbox));
    for(i = 0; i < total; ++i){
        s[i].index = i;       
--- a/src/box.h
+++ b/src/box.h
@ -27,6 +27,9 @@ typedef struct detection_with_class {
 	int best_class;
 } detection_with_class;
 #ifdef __cplusplus
 extern "C" {
 #endif
 box float_to_box(float *f);
 float box_iou(box a, box b);
 float box_rmse(box a, box b);
@ -42,4 +45,7 @@ box encode_box(box b, box anchor);
 // Return number of selected detections in *selected_detections_num
 detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/captcha.c
+++ b/src/captcha.c
@ -41,11 +41,11 @@ void train_captcha(char *cfgfile, char *weightfile)
    int i = *net.seen/imgs;
    int solved = 1;
    list *plist;
-    char **labels = get_labels("/data/captcha/reimgs.labels.list");
+    char** labels = get_labels("data/captcha/reimgs.labels.list");
    if (solved){
-        plist = get_paths("/data/captcha/reimgs.solved.list");
+        plist = get_paths("data/captcha/reimgs.solved.list");
    }else{
-        plist = get_paths("/data/captcha/reimgs.raw.list");
+        plist = get_paths("data/captcha/reimgs.raw.list");
    }
    char **paths = (char **)list_to_array(plist);
    printf("%d\n", plist->size);
@ -89,7 +89,7 @@ void train_captcha(char *cfgfile, char *weightfile)
        free_data(train);
        if(i%100==0){
            char buff[256];
-            sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i);
+            sprintf(buff, "imagenet_backup/%s_%d.weights", base, i);
            save_weights(net, buff);
        }
    }
@ -104,7 +104,7 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
    set_batch_network(&net, 1);
    srand(2222222);
    int i = 0;
-    char **names = get_labels("/data/captcha/reimgs.labels.list");
+    char** names = get_labels("data/captcha/reimgs.labels.list");
    char buff[256];
    char *input = buff;
    int indexes[26];
@ -137,12 +137,12 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
 void valid_captcha(char *cfgfile, char *weightfile, char *filename)
 {
-    char **labels = get_labels("/data/captcha/reimgs.labels.list");
+    char** labels = get_labels("data/captcha/reimgs.labels.list");
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
-    list *plist = get_paths("/data/captcha/reimgs.fg.list");
+    list* plist = get_paths("data/captcha/reimgs.fg.list");
    char **paths = (char **)list_to_array(plist);
    int N = plist->size;
    int outputs = net.outputs;
--- a/src/cifar.c
+++ b/src/cifar.c
@ -5,7 +5,7 @@
 #include "blas.h"
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 void train_cifar(char *cfgfile, char *weightfile)
@ -20,7 +20,7 @@ void train_cifar(char *cfgfile, char *weightfile)
    }
    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
    int classes = 10;
    int N = 50000;
@ -68,7 +68,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
    }
    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
    int classes = 10;
    int N = 50000;
--- a/src/classifier.c
+++ b/src/classifier.c
@ -8,20 +8,18 @@
 #include "cuda.h"
 #ifdef WIN32
 #include <time.h>
 #include <winsock.h>
 #include "gettimeofday.h"
 #else
 #include <sys/time.h>
 #endif
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
-#include "opencv2/core/version.hpp"
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
-image get_image_from_stream(CvCapture *cap);
+#include "image.h"
 image get_image_from_stream_cpp(CvCapture *cap);
 #include "http_stream.h"
 IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
@ -34,7 +32,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
 float *get_regression_values(char **labels, int n)
 {
-    float *v = calloc(n, sizeof(float));
+    float* v = (float*)calloc(n, sizeof(float));
    int i;
    for(i = 0; i < n; ++i){
        char *p = strchr(labels[i], ' ');
@ -52,7 +50,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
-    network *nets = calloc(ngpus, sizeof(network));
+    network* nets = (network*)calloc(ngpus, sizeof(network));
    srand(time(0));
    int seed = rand();
@ -431,7 +429,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
    float avg_acc = 0;
    float avg_topk = 0;
-    int *indexes = calloc(topk, sizeof(int));
+    int* indexes = (int*)calloc(topk, sizeof(int));
    for(i = 0; i < m; ++i){
        int class_id = -1;
@ -458,7 +456,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
        images[7] = crop_image(im, 0, 0, w, h);
        images[8] = crop_image(im, -shift, shift, w, h);
        images[9] = crop_image(im, shift, shift, w, h);
-        float *pred = calloc(classes, sizeof(float));
+        float* pred = (float*)calloc(classes, sizeof(float));
        for(j = 0; j < 10; ++j){
            float *p = network_predict(net, images[j].data);
            if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
@ -504,7 +502,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
    float avg_acc = 0;
    float avg_topk = 0;
-    int *indexes = calloc(topk, sizeof(int));
+    int* indexes = (int*)calloc(topk, sizeof(int));
    int size = net.w;
    for(i = 0; i < m; ++i){
@ -581,7 +579,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
    float avg_acc = 0;
    float avg_topk = 0;
-    int *indexes = calloc(topk, sizeof(int));
+    int* indexes = (int*)calloc(topk, sizeof(int));
    for(i = 0; i < m; ++i){
        int class_id = -1;
@ -651,7 +649,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
    float avg_acc = 0;
    float avg_topk = 0;
-    int *indexes = calloc(topk, sizeof(int));
+    int* indexes = (int*)calloc(topk, sizeof(int));
    for(i = 0; i < m; ++i){
        int class_id = -1;
@ -662,7 +660,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
                break;
            }
        }
-        float *pred = calloc(classes, sizeof(float));
+        float* pred = (float*)calloc(classes, sizeof(float));
        image im = load_image_color(paths[i], 0, 0);
        for(j = 0; j < nscales; ++j){
            image r = resize_min(im, scales[j]);
@ -707,7 +705,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena
    int i = 0;
    char **names = get_labels(name_list);
    clock_t time;
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
    char buff[256];
    char *input = buff;
    while(1){
@ -790,7 +788,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
    int i = 0;
    char **names = get_labels(name_list);
    clock_t time;
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
    char buff[256];
    char *input = buff;
    int size = net.w;
@ -973,7 +971,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
    char *name_list = option_find_str(options, "names", 0);
    char **names = get_labels(name_list);
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
    if(!cap) error("Couldn't connect to webcam.\n");
    //cvNamedWindow("Threat", CV_WINDOW_NORMAL);
@ -1051,11 +1049,13 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
        }
        top_predictions(net, top, indexes);
        char buff[256];
-        sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
+        sprintf(buff, "tmp/threat_%06d", count);
        //save_image(out, buff);
 #ifndef _WIN32
        printf("\033[2J");
        printf("\033[1;1H");
 #endif
        printf("\nFPS:%.0f\n",fps);
        for(i = 0; i < top; ++i){
@ -1111,7 +1111,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
    char *name_list = option_find_str(options, "names", 0);
    char **names = get_labels(name_list);
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
    if(!cap) error("Couldn't connect to webcam.\n");
    cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL);
@ -1193,7 +1193,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
    char *name_list = option_find_str(options, "names", 0);
    char **names = get_labels(name_list);
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
    if(!cap) error("Couldn't connect to webcam.\n");
    cvNamedWindow("Classifier", CV_WINDOW_NORMAL);
@ -1214,8 +1214,10 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
        if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1);
        top_predictions(net, top, indexes);
 #ifndef _WIN32
        printf("\033[2J");
        printf("\033[1;1H");
 #endif
        printf("\nFPS:%.0f\n",fps);
        for(i = 0; i < top; ++i){
@ -1257,7 +1259,7 @@ void run_classifier(int argc, char **argv)
        for(i = 0; i < len; ++i){
            if (gpu_list[i] == ',') ++ngpus;
        }
-        gpus = calloc(ngpus, sizeof(int));
+        gpus = (int*)calloc(ngpus, sizeof(int));
        for(i = 0; i < ngpus; ++i){
            gpus[i] = atoi(gpu_list);
            gpu_list = strchr(gpu_list, ',')+1;
--- a/src/classifier.h
+++ b/src/classifier.h
@ -1,2 +1,12 @@
 #ifndef CLASSIFIER_H
 #define CLASSIFIER_H
 #include "list.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 list *read_data_cfg(char *filename);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/coco.c
+++ b/src/coco.c
@ -9,7 +9,7 @@
 #include "demo.h"
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};
@ -22,7 +22,7 @@ void train_coco(char *cfgfile, char *weightfile)
    //char *train_images = "/home/pjreddie/data/coco/train.txt";
    char *train_images = "data/coco.trainval.txt";
    //char *train_images = "data/bags.train.list";
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
    srand(time(0));
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
@ -164,9 +164,9 @@ void validate_coco(char *cfgfile, char *weightfile)
    FILE *fp = fopen(buff, "w");
    fprintf(fp, "[\n");
-    box *boxes = calloc(side*side*l.n, sizeof(box));
+    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
-    float **probs = calloc(side*side*l.n, sizeof(float *));
+    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
-    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
+    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
    int m = plist->size;
    int i=0;
@ -177,11 +177,11 @@ void validate_coco(char *cfgfile, char *weightfile)
    float iou_thresh = .5;
    int nthreads = 8;
-    image *val = calloc(nthreads, sizeof(image));
+    image* val = (image*)calloc(nthreads, sizeof(image));
-    image *val_resized = calloc(nthreads, sizeof(image));
+    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image *buf = calloc(nthreads, sizeof(image));
+    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image *buf_resized = calloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
    load_args args = {0};
    args.w = net.w;
@ -240,7 +240,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
    srand(time(0));
    char *base = "results/comp4_det_test_";
-    list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt");
+    list* plist = get_paths("data/voc/test/2007_test.txt");
    char **paths = (char **)list_to_array(plist);
    layer l = net.layers[net.n-1];
@ -248,15 +248,15 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
    int side = l.side;
    int j, k;
-    FILE **fps = calloc(classes, sizeof(FILE *));
+    FILE** fps = (FILE**)calloc(classes, sizeof(FILE*));
    for(j = 0; j < classes; ++j){
        char buff[1024];
        snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
        fps[j] = fopen(buff, "w");
    }
-    box *boxes = calloc(side*side*l.n, sizeof(box));
+    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
-    float **probs = calloc(side*side*l.n, sizeof(float *));
+    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
-    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
+    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
    int m = plist->size;
    int i=0;
@ -328,9 +328,9 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
    char buff[256];
    char *input = buff;
    int j;
-    box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
+    box* boxes = (box*)calloc(l.side * l.side * l.n, sizeof(box));
-    float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
+    float** probs = (float**)calloc(l.side * l.side * l.n, sizeof(float*));
-    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
+    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
    while(1){
        if(filename){
            strncpy(input, filename, 256);
--- a/src/col2im.c
+++ b/src/col2im.c
@ -1,5 +1,6 @@
 #include <stdio.h>
 #include <math.h>
 #include "col2im.h"
 void col2im_add_pixel(float *im, int height, int width, int channels,
                        int row, int col, int channel, int pad, float val)
 {
--- a/src/col2im.h
+++ b/src/col2im.h
@ -1,6 +1,9 @@
 #ifndef COL2IM_H
 #define COL2IM_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 void col2im_cpu(float* data_col,
        int channels, int height, int width,
        int ksize, int stride, int pad, float* data_im);
@ -10,4 +13,7 @@ void col2im_ongpu(float *data_col,
        int channels, int height, int width,
        int ksize, int stride, int pad, float *data_im);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/col2im_kernels.cu
+++ b/src/col2im_kernels.cu
@ -2,10 +2,8 @@
 #include "curand.h"
 #include "cublas_v2.h"
 extern "C" {
 #include "col2im.h"
 #include "cuda.h"
 }
 // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
 // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
--- a/src/compare.c
+++ b/src/compare.c
@ -12,7 +12,7 @@ void train_compare(char *cfgfile, char *weightfile)
    srand(time(0));
    float avg_loss = -1;
    char *base = basecfg(cfgfile);
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
    printf("%s\n", base);
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
@ -176,7 +176,7 @@ int bbox_comparator(const void *a, const void *b)
    image im1 = load_image_color(box1.filename, net.w, net.h);
    image im2 = load_image_color(box2.filename, net.w, net.h);
-    float *X  = calloc(net.w*net.h*net.c, sizeof(float));
+    float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
    memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
    memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
    float *predictions = network_predict(net, X);
@ -205,7 +205,7 @@ void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, in
 {
    image im1 = load_image_color(a->filename, net.w, net.h);
    image im2 = load_image_color(b->filename, net.w, net.h);
-    float *X  = calloc(net.w*net.h*net.c, sizeof(float));
+    float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
    memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
    memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
    float *predictions = network_predict(net, X);
@ -239,7 +239,7 @@ void SortMaster3000(char *filename, char *weightfile)
    char **paths = (char **)list_to_array(plist);
    int N = plist->size;
    free_list(plist);
-    sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
+    sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
    printf("Sorting %d boxes...\n", N);
    for(i = 0; i < N; ++i){
        boxes[i].filename = paths[i];
@ -274,13 +274,13 @@ void BattleRoyaleWithCheese(char *filename, char *weightfile)
    int N = plist->size;
    int total = N;
    free_list(plist);
-    sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
+    sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
    printf("Battling %d boxes...\n", N);
    for(i = 0; i < N; ++i){
        boxes[i].filename = paths[i];
        boxes[i].net = net;
        boxes[i].classes = classes;
-        boxes[i].elos = calloc(classes, sizeof(float));;
+        boxes[i].elos = (float*)calloc(classes, sizeof(float));
        for(j = 0; j < classes; ++j){
            boxes[i].elos[j] = 1500;
        }
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@ -54,7 +54,7 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
 {
    int total_batch = batch*steps;
    int i;
-    connected_layer l = {0};
+    connected_layer l = { (LAYER_TYPE)0 };
    l.type = CONNECTED;
    l.inputs = inputs;
@ -74,14 +74,14 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
    l.activation = activation;
    l.learning_rate_scale = 1;
-    l.output = calloc(total_batch*outputs, sizeof(float));
+    l.output = (float*)calloc(total_batch * outputs, sizeof(float));
-    l.delta = calloc(total_batch*outputs, sizeof(float));
+    l.delta = (float*)calloc(total_batch * outputs, sizeof(float));
-    l.weight_updates = calloc(inputs*outputs, sizeof(float));
+    l.weight_updates = (float*)calloc(inputs * outputs, sizeof(float));
-    l.bias_updates = calloc(outputs, sizeof(float));
+    l.bias_updates = (float*)calloc(outputs, sizeof(float));
-    l.weights = calloc(outputs*inputs, sizeof(float));
+    l.weights = (float*)calloc(outputs * inputs, sizeof(float));
-    l.biases = calloc(outputs, sizeof(float));
+    l.biases = (float*)calloc(outputs, sizeof(float));
    l.forward = forward_connected_layer;
    l.backward = backward_connected_layer;
@ -98,22 +98,22 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
    }
    if(batch_normalize){
-        l.scales = calloc(outputs, sizeof(float));
+        l.scales = (float*)calloc(outputs, sizeof(float));
-        l.scale_updates = calloc(outputs, sizeof(float));
+        l.scale_updates = (float*)calloc(outputs, sizeof(float));
        for(i = 0; i < outputs; ++i){
            l.scales[i] = 1;
        }
-        l.mean = calloc(outputs, sizeof(float));
+        l.mean = (float*)calloc(outputs, sizeof(float));
-        l.mean_delta = calloc(outputs, sizeof(float));
+        l.mean_delta = (float*)calloc(outputs, sizeof(float));
-        l.variance = calloc(outputs, sizeof(float));
+        l.variance = (float*)calloc(outputs, sizeof(float));
-        l.variance_delta = calloc(outputs, sizeof(float));
+        l.variance_delta = (float*)calloc(outputs, sizeof(float));
-        l.rolling_mean = calloc(outputs, sizeof(float));
+        l.rolling_mean = (float*)calloc(outputs, sizeof(float));
-        l.rolling_variance = calloc(outputs, sizeof(float));
+        l.rolling_variance = (float*)calloc(outputs, sizeof(float));
-        l.x = calloc(total_batch*outputs, sizeof(float));
+        l.x = (float*)calloc(total_batch * outputs, sizeof(float));
-        l.x_norm = calloc(total_batch*outputs, sizeof(float));
+        l.x_norm = (float*)calloc(total_batch * outputs, sizeof(float));
    }
 #ifdef GPU
--- a/src/connected_layer.h
+++ b/src/connected_layer.h
@ -7,7 +7,11 @@
 typedef layer connected_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 connected_layer make_connected_layer(int batch, int steps, int inputs, int outputs, ACTIVATION activation, int batch_normalize);
 size_t get_connected_workspace_size(layer l);
 void forward_connected_layer(connected_layer layer, network_state state);
 void backward_connected_layer(connected_layer layer, network_state state);
@ -23,5 +27,8 @@ void push_connected_layer(connected_layer layer);
 void pull_connected_layer(connected_layer layer);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@ -3,10 +3,11 @@
 #include "cublas_v2.h"
 #ifdef CUDNN
 #ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "cudnn.lib")
 #endif
 #endif
 extern "C" {
 #include "convolutional_layer.h"
 #include "batchnorm_layer.h"
 #include "gemm.h"
@ -15,7 +16,7 @@ extern "C" {
 #include "col2im.h"
 #include "utils.h"
 #include "cuda.h"
-}
+
 __global__ void binarize_kernel(float *x, int n, float *binary)
 {
@ -73,7 +74,6 @@ void binarize_weights_gpu(float *weights, int n, int size, float *binary)
    CHECK_CUDA(cudaPeekAtLastError());
 }
 #define WARP_SIZE 32
 __global__ void set_zero_kernel(float *src, int size)
 {
@ -477,10 +477,10 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
                simple_copy_ongpu(l.outputs*l.batch / 2, output16, l.x_gpu);
                //copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1);
                //cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream());
-                float one = 1;
+                float one = 1.0f;
-                float zero = 0;
+                float zero = 0.0f;
                // Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth
-                // compared to FP32, it<EFBFBD>s just that the statistics and value adjustment should be done in FP32.
+                // compared to FP32, it's just that the statistics and value adjustment should be done in FP32.
                CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(cudnn_handle(),
                    CUDNN_BATCHNORM_SPATIAL,
                    &one,
@ -639,8 +639,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
            //    l.mean_gpu = l.rolling_mean_gpu;
            //    l.variance_gpu = l.rolling_variance_gpu;
            //}
-            float one = 1;
+            float one = 1.0f;
-            float zero = 0;
+            float zero = 0.0f;
            CHECK_CUDNN(cudnnBatchNormalizationBackward(cudnn_handle(),
                CUDNN_BATCHNORM_SPATIAL,
                &one,
@ -936,4 +936,3 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float
    }
 }
 */
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -9,8 +9,10 @@
 #include <time.h>
 #ifdef CUDNN
 #ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "cudnn.lib")
 #endif
 #endif
 #ifdef AI2
 #include "xnor_layer.h"
@ -288,7 +290,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
            l->weightDesc,
            l->convDesc,
            l->dstTensorDesc,
-            forward_algo,
+            (cudnnConvolutionFwdPreference_t)forward_algo,
            0,
            &l->fw_algo));
    CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
@ -296,7 +298,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
            l->ddstTensorDesc,
            l->convDesc,
            l->dsrcTensorDesc,
-            backward_algo,
+            (cudnnConvolutionBwdDataPreference_t)backward_algo,
            0,
            &l->bd_algo));
    CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
@ -304,7 +306,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
            l->ddstTensorDesc,
            l->convDesc,
            l->dweightDesc,
-            backward_filter,
+            (cudnnConvolutionBwdFilterPreference_t)backward_filter,
            0,
            &l->bf_algo));
@ -328,7 +330,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
 {
    int total_batch = batch*steps;
    int i;
-    convolutional_layer l = {0};
+    convolutional_layer l = { (LAYER_TYPE)0 };
    l.type = CONVOLUTIONAL;
    l.index = index;
@ -346,11 +348,11 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
    l.batch_normalize = batch_normalize;
    l.learning_rate_scale = 1;
-    l.weights = calloc(c*n*size*size, sizeof(float));
+    l.weights = (float*)calloc(c * n * size * size, sizeof(float));
-    l.weight_updates = calloc(c*n*size*size, sizeof(float));
+    l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
-    l.biases = calloc(n, sizeof(float));
+    l.biases = (float*)calloc(n, sizeof(float));
-    l.bias_updates = calloc(n, sizeof(float));
+    l.bias_updates = (float*)calloc(n, sizeof(float));
    // float scale = 1./sqrt(size*size*c);
    float scale = sqrt(2./(size*size*c));
@ -364,64 +366,64 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
    l.inputs = l.w * l.h * l.c;
    l.activation = activation;
-    l.output = calloc(total_batch*l.outputs, sizeof(float));
+    l.output = (float*)calloc(total_batch*l.outputs, sizeof(float));
-    l.delta  = calloc(total_batch*l.outputs, sizeof(float));
+    l.delta  = (float*)calloc(total_batch*l.outputs, sizeof(float));
    l.forward = forward_convolutional_layer;
    l.backward = backward_convolutional_layer;
    l.update = update_convolutional_layer;
    if(binary){
-        l.binary_weights = calloc(c*n*size*size, sizeof(float));
+        l.binary_weights = (float*)calloc(c * n * size * size, sizeof(float));
-        l.cweights = calloc(c*n*size*size, sizeof(char));
+        l.cweights = (char*)calloc(c * n * size * size, sizeof(char));
-        l.scales = calloc(n, sizeof(float));
+        l.scales = (float*)calloc(n, sizeof(float));
    }
    if(xnor){
-        l.binary_weights = calloc(c*n*size*size, sizeof(float));
+        l.binary_weights = (float*)calloc(c * n * size * size, sizeof(float));
-        l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
+        l.binary_input = (float*)calloc(l.inputs * l.batch, sizeof(float));
        int align = 32;// 8;
        int src_align = l.out_h*l.out_w;
        l.bit_align = src_align + (align - src_align % align);
-        l.mean_arr = calloc(l.n, sizeof(float));
+        l.mean_arr = (float*)calloc(l.n, sizeof(float));
        const size_t new_c = l.c / 32;
        size_t in_re_packed_input_size = new_c * l.w * l.h + 1;
-        l.bin_re_packed_input = calloc(in_re_packed_input_size, sizeof(uint32_t));
+        l.bin_re_packed_input = (uint32_t*)calloc(in_re_packed_input_size, sizeof(uint32_t));
        l.lda_align = 256;  // AVX2
        int k = l.size*l.size*l.c;
        size_t k_aligned = k + (l.lda_align - k%l.lda_align);
        size_t t_bit_input_size = k_aligned * l.bit_align / 8;
-        l.t_bit_input = calloc(t_bit_input_size, sizeof(char));
+        l.t_bit_input = (char*)calloc(t_bit_input_size, sizeof(char));
    }
    if(batch_normalize){
-        l.scales = calloc(n, sizeof(float));
+        l.scales = (float*)calloc(n, sizeof(float));
-        l.scale_updates = calloc(n, sizeof(float));
+        l.scale_updates = (float*)calloc(n, sizeof(float));
        for(i = 0; i < n; ++i){
            l.scales[i] = 1;
        }
-        l.mean = calloc(n, sizeof(float));
+        l.mean = (float*)calloc(n, sizeof(float));
-        l.variance = calloc(n, sizeof(float));
+        l.variance = (float*)calloc(n, sizeof(float));
-        l.mean_delta = calloc(n, sizeof(float));
+        l.mean_delta = (float*)calloc(n, sizeof(float));
-        l.variance_delta = calloc(n, sizeof(float));
+        l.variance_delta = (float*)calloc(n, sizeof(float));
-        l.rolling_mean = calloc(n, sizeof(float));
+        l.rolling_mean = (float*)calloc(n, sizeof(float));
-        l.rolling_variance = calloc(n, sizeof(float));
+        l.rolling_variance = (float*)calloc(n, sizeof(float));
-        l.x = calloc(total_batch*l.outputs, sizeof(float));
+        l.x = (float*)calloc(total_batch * l.outputs, sizeof(float));
-        l.x_norm = calloc(total_batch*l.outputs, sizeof(float));
+        l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float));
    }
    if(adam){
        l.adam = 1;
-        l.m = calloc(c*n*size*size, sizeof(float));
+        l.m = (float*)calloc(c * n * size * size, sizeof(float));
-        l.v = calloc(c*n*size*size, sizeof(float));
+        l.v = (float*)calloc(c * n * size * size, sizeof(float));
-        l.bias_m = calloc(n, sizeof(float));
+        l.bias_m = (float*)calloc(n, sizeof(float));
-        l.scale_m = calloc(n, sizeof(float));
+        l.scale_m = (float*)calloc(n, sizeof(float));
-        l.bias_v = calloc(n, sizeof(float));
+        l.bias_v = (float*)calloc(n, sizeof(float));
-        l.scale_v = calloc(n, sizeof(float));
+        l.scale_v = (float*)calloc(n, sizeof(float));
    }
 #ifdef GPU
@ -549,11 +551,11 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
    l->outputs = l->out_h * l->out_w * l->out_c;
    l->inputs = l->w * l->h * l->c;
-    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
+    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
-    l->delta  = realloc(l->delta,  l->batch*l->outputs*sizeof(float));
+    l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
    if(l->batch_normalize){
-        l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
+        l->x = (float*)realloc(l->x, l->batch * l->outputs * sizeof(float));
-        l->x_norm  = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
+        l->x_norm = (float*)realloc(l->x_norm, l->batch * l->outputs * sizeof(float));
    }
    if (l->xnor) {
@ -642,7 +644,7 @@ void gemm_nn_custom(int M, int N, int K, float ALPHA,
    int i, j, k;
    for (i = 0; i < M; ++i) {
        for (k = 0; k < K; ++k) {
-            register float A_PART = ALPHA*A[i*lda + k];
+            float A_PART = ALPHA * A[i * lda + k];
            //printf("\n weight = %f \n", A_PART);
            for (j = 0; j < N; ++j) {
                C[i*ldc + j] += A_PART*B[k*ldb + j];
@ -695,8 +697,8 @@ void binary_align_weights(convolutional_layer *l)
    size_t align_weights_size = new_lda * m;
    l->align_bit_weights_size = align_weights_size / 8 + 1;
-    float *align_weights = calloc(align_weights_size, sizeof(float));
+    float* align_weights = (float*)calloc(align_weights_size, sizeof(float));
-    l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char));
+    l->align_bit_weights = (char*)calloc(l->align_bit_weights_size, sizeof(char));
    size_t i, j;
    // align A without transpose
@ -739,7 +741,7 @@ void binary_align_weights(convolutional_layer *l)
        //printf("\n l.index = %d \t aw[0] = %f, aw[1] = %f, aw[2] = %f, aw[3] = %f \n", l->index, align_weights[0], align_weights[1], align_weights[2], align_weights[3]);
        //memcpy(l->binary_weights, align_weights, (l->size * l->size * l->c * l->n) * sizeof(float));
-        float_to_bit(align_weights, l->align_bit_weights, align_weights_size);
+        float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size);
        //if (l->n >= 32)
        if(gpu_index >= 0)
@ -757,7 +759,7 @@ void binary_align_weights(convolutional_layer *l)
        //get_mean_array(l->binary_weights, m*new_lda, l->n, l->mean_arr);
    }
    else {
-        float_to_bit(align_weights, l->align_bit_weights, align_weights_size);
+        float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size);
        get_mean_array(l->binary_weights, m*k, l->n, l->mean_arr);
    }
@ -808,7 +810,7 @@ size_t binary_transpose_align_input(int k, int n, float *b, char **t_bit_input,
    // t_bit_input - [new_ldb, n] - [k', n]
    //transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8);
-    transpose_bin(b, *t_bit_input, k, n, bit_align, new_ldb, 8);
+    transpose_bin((uint32_t*)b, (uint32_t*)*t_bit_input, k, n, bit_align, new_ldb, 8);
    return t_intput_size;
 }
@ -874,7 +876,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
                repack_input(state.input, state.workspace, l.w, l.h, l.c);
                // 32 x floats -> 1 x uint32_t
-                float_to_bit(state.workspace, (char *)l.bin_re_packed_input, l.c * l.w * l.h);
+                float_to_bit(state.workspace, (unsigned char *)l.bin_re_packed_input, l.c * l.w * l.h);
                //free(re_packed_input);
@ -900,10 +902,10 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
 // // then exit from if()
-                transpose_uint32((uint32_t *)state.workspace, l.t_bit_input, new_k, n, n, new_ldb);
+                transpose_uint32((uint32_t *)state.workspace, (uint32_t*)l.t_bit_input, new_k, n, n, new_ldb);
                // the main GEMM function
-                gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, l.t_bit_input, new_ldb, c, n, l.mean_arr);
+                gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr);
                // // alternative GEMM
                //gemm_nn_bin_transposed_32bit_packed(m, n, new_k, 1,
@ -945,7 +947,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
                    size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align);
                    // 5x times faster than gemm()-float32
-                    gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, l.t_bit_input, new_ldb, c, n, l.mean_arr);
+                    gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr);
                    //gemm_nn_custom_bin_mean_transposed(m, n, k, 1, bit_weights, k, t_bit_input, new_ldb, c, n, mean_arr);
@ -1074,7 +1076,7 @@ void rescale_weights(convolutional_layer l, float scale, float trans)
 image *get_weights(convolutional_layer l)
 {
-    image *weights = calloc(l.n, sizeof(image));
+    image* weights = (image*)calloc(l.n, sizeof(image));
    int i;
    for(i = 0; i < l.n; ++i){
        weights[i] = copy_image(get_convolutional_weight(l, i));
@ -1097,4 +1099,3 @@ image *visualize_convolutional_layer(convolutional_layer l, char *window, image
    free_image(dc);
    return single_weights;
 }
--- a/src/convolutional_layer.h
+++ b/src/convolutional_layer.h
@ -9,6 +9,9 @@
 typedef layer convolutional_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 #ifdef GPU
 void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
 void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
@ -22,11 +25,11 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
 #ifdef CUDNN
 void cudnn_convolutional_setup(layer *l, int cudnn_preference);
 void create_convolutional_cudnn_tensors(layer *l);
 size_t get_convolutional_workspace_size(layer l);
 void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
 #endif
 #endif
 size_t get_convolutional_workspace_size(layer l);
 convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index);
 void denormalize_convolutional_layer(convolutional_layer l);
 void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
@ -53,5 +56,8 @@ int convolutional_out_width(convolutional_layer layer);
 void rescale_weights(convolutional_layer l, float scale, float trans);
 void rgbgr_weights(convolutional_layer l);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@ -32,7 +32,7 @@ char *get_cost_string(COST_TYPE a)
 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
 {
    fprintf(stderr, "cost                                           %4d\n",  inputs);
-    cost_layer l = {0};
+    cost_layer l = { (LAYER_TYPE)0 };
    l.type = COST;
    l.scale = scale;
@ -40,9 +40,9 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca
    l.inputs = inputs;
    l.outputs = inputs;
    l.cost_type = cost_type;
-    l.delta = calloc(inputs*batch, sizeof(float));
+    l.delta = (float*)calloc(inputs * batch, sizeof(float));
-    l.output = calloc(inputs*batch, sizeof(float));
+    l.output = (float*)calloc(inputs * batch, sizeof(float));
-    l.cost = calloc(1, sizeof(float));
+    l.cost = (float*)calloc(1, sizeof(float));
    l.forward = forward_cost_layer;
    l.backward = backward_cost_layer;
@ -60,8 +60,8 @@ void resize_cost_layer(cost_layer *l, int inputs)
 {
    l->inputs = inputs;
    l->outputs = inputs;
-    l->delta = realloc(l->delta, inputs*l->batch*sizeof(float));
+    l->delta = (float*)realloc(l->delta, inputs * l->batch * sizeof(float));
-    l->output = realloc(l->output, inputs*l->batch*sizeof(float));
+    l->output = (float*)realloc(l->output, inputs * l->batch * sizeof(float));
 #ifdef GPU
    cuda_free(l->delta_gpu);
    cuda_free(l->output_gpu);
--- a/src/cost_layer.h
+++ b/src/cost_layer.h
@ -5,6 +5,9 @@
 typedef layer cost_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 COST_TYPE get_cost_type(char *s);
 char *get_cost_string(COST_TYPE a);
 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
@ -17,4 +20,7 @@ void forward_cost_layer_gpu(cost_layer l, network_state state);
 void backward_cost_layer_gpu(const cost_layer l, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/cpu_gemm.c
+++ b/src/cpu_gemm.c
@ -9,7 +9,7 @@ void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(k = 0; k < K; ++k){
-            register float A_PART = ALPHA*A[i*lda+k];
+            float A_PART = ALPHA * A[i * lda + k];
            for(j = 0; j < N; ++j){
                C[i*ldc+j] += A_PART*B[k*ldb+j];
            }
@ -26,7 +26,7 @@ void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(j = 0; j < N; ++j){
-            register float sum = 0;
+            float sum = 0;
            for(k = 0; k < K; ++k){
                sum += ALPHA*A[i*lda+k]*B[k+j*ldb];
            }
@ -44,7 +44,7 @@ void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(k = 0; k < K; ++k){
-            register float A_PART = ALPHA*A[k*lda+i];
+            float A_PART = ALPHA * A[k * lda + i];
            for(j = 0; j < N; ++j){
                C[i*ldc+j] += A_PART*B[k*ldb+j];
            }
--- a/src/crnn_layer.c
+++ b/src/crnn_layer.c
@ -30,7 +30,7 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
 {
    fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
    batch = batch / steps;
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
    l.batch = batch;
    l.type = CRNN;
    l.steps = steps;
@ -44,22 +44,19 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
    l.hidden = h * w * hidden_filters;
    l.outputs = l.out_h * l.out_w * l.out_c;
-    l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
+    l.state = (float*)calloc(l.hidden * batch * (steps + 1), sizeof(float));
-    l.input_layer = malloc(sizeof(layer));
+    l.input_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "");
    *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
    l.input_layer->batch = batch;
    if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
-    l.self_layer = malloc(sizeof(layer));
+    l.self_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "");
    *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
    l.self_layer->batch = batch;
    if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
-    l.output_layer = malloc(sizeof(layer));
+    l.output_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "");
    *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
    l.output_layer->batch = batch;
    if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
--- a/src/crnn_layer.h
+++ b/src/crnn_layer.h
@ -6,6 +6,9 @@
 #include "layer.h"
 #include "network.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize);
 void forward_crnn_layer(layer l, network_state state);
@ -20,5 +23,8 @@ void push_crnn_layer(layer l);
 void pull_crnn_layer(layer l);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/crop_layer.c
+++ b/src/crop_layer.c
@ -16,7 +16,7 @@ void backward_crop_layer_gpu(const crop_layer l, network_state state){}
 crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
 {
    fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
-    crop_layer l = {0};
+    crop_layer l = { (LAYER_TYPE)0 };
    l.type = CROP;
    l.batch = batch;
    l.h = h;
@ -32,7 +32,7 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int
    l.out_c = c;
    l.inputs = l.w * l.h * l.c;
    l.outputs = l.out_w * l.out_h * l.out_c;
-    l.output = calloc(l.outputs*batch, sizeof(float));
+    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
    l.forward = forward_crop_layer;
    l.backward = backward_crop_layer;
@ -56,7 +56,7 @@ void resize_crop_layer(layer *l, int w, int h)
    l->inputs = l->w * l->h * l->c;
    l->outputs = l->out_h * l->out_w * l->out_c;
-    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
+    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
    #ifdef GPU
    cuda_free(l->output_gpu);
    l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
--- a/src/crop_layer.h
+++ b/src/crop_layer.h
@ -7,6 +7,9 @@
 typedef layer crop_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 image get_crop_image(crop_layer l);
 crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
 void forward_crop_layer(const crop_layer l, network_state state);
@ -16,5 +19,8 @@ void resize_crop_layer(layer *l, int w, int h);
 void forward_crop_layer_gpu(crop_layer l, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@ -2,12 +2,10 @@
 #include "curand.h"
 #include "cublas_v2.h"
 extern "C" {
 #include "crop_layer.h"
 #include "utils.h"
 #include "cuda.h"
 #include "image.h"
 }
 __device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
 {
--- a/src/cuda.c
+++ b/src/cuda.c
@ -1,4 +1,10 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 int gpu_index = 0;
 #ifdef __cplusplus
 }
 #endif // __cplusplus
 #ifdef GPU
@ -71,7 +77,7 @@ dim3 cuda_gridsize(size_t n){
        x = ceil(sqrt(k));
        y = (n-1)/(x*BLOCK) + 1;
    }
-    dim3 d = {x, y, 1};
+    dim3 d = { (unsigned int)x, (unsigned int)y, 1 };
    //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
    return d;
 }
@ -188,7 +194,7 @@ cublasHandle_t blas_handle()
    if(!init[i]) {
        cublasCreate(&handle[i]);
        cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream());
-        CHECK_CUDA(status);
+        CHECK_CUDA((cudaError_t)status);
        init[i] = 1;
    }
    return handle[i];
@ -226,7 +232,7 @@ void cuda_random(float *x_gpu, size_t n)
 float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
 {
-    float *tmp = calloc(n, sizeof(float));
+    float* tmp = (float*)calloc(n, sizeof(float));
    cuda_pull_array(x_gpu, tmp, n);
    //int i;
    //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
@ -310,6 +316,6 @@ int get_gpu_compute_capability(int i)
 }
 #else // GPU
-#include "cuda.h"
+#include "darknet.h"
 void cuda_set_device(int n) {}
 #endif // GPU
--- a/src/cuda.h
+++ b/src/cuda.h
@ -1,25 +1,27 @@
-#ifndef CUDA_H
+#ifndef DARKCUDA_H
-#define CUDA_H
+#define DARKCUDA_H
 #include "darknet.h"
-#if defined(_MSC_VER) && _MSC_VER < 1900
+#ifdef __cplusplus
-	#define inline __inline
+extern "C" {
 #endif
 extern int gpu_index;
 #ifdef __cplusplus
 }
 #endif // __cplusplus
 #ifdef GPU
 #define BLOCK 512
-#include "cuda_runtime.h"
+#include <cuda_runtime.h>
-#include "curand.h"
+#include <curand.h>
-#include "cublas_v2.h"
+#include <cublas_v2.h>
-#include "cuda_runtime_api.h"
+#include <cuda_runtime_api.h>
-//#include "driver_types.h"
+#include <driver_types.h>
 #ifdef CUDNN
-#include "cudnn.h"
+#include <cudnn.h>
 #endif // CUDNN
 #ifndef __DATE__
@ -65,9 +67,6 @@ extern "C" {
    cudaStream_t get_cuda_memcpy_stream();
    int get_number_of_blocks(int array_size, int block_size);
    int get_gpu_compute_capability(int i);
 #ifdef __cplusplus
 }
 #endif // __cplusplus
 #ifdef CUDNN
 cudnnHandle_t cudnn_handle();
@ -77,6 +76,10 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line
 #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
 #endif
 #ifdef __cplusplus
 }
 #endif // __cplusplus
 #else // GPU
 //LIB_API void cuda_set_device(int n);
 #endif // GPU
--- a/src/darknet.c
+++ b/src/darknet.c
@ -10,7 +10,7 @@
 #include "connected_layer.h"
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
@ -258,12 +258,12 @@ layer normalize_layer(layer l, int n)
 {
    int j;
    l.batch_normalize=1;
-    l.scales = calloc(n, sizeof(float));
+    l.scales = (float*)calloc(n, sizeof(float));
    for(j = 0; j < n; ++j){
        l.scales[j] = 1;
    }
-    l.rolling_mean = calloc(n, sizeof(float));
+    l.rolling_mean = (float*)calloc(n, sizeof(float));
-    l.rolling_variance = calloc(n, sizeof(float));
+    l.rolling_variance = (float*)calloc(n, sizeof(float));
    return l;
 }
--- a/src/darkunistd.h
+++ b/src/darkunistd.h
@ -1,3 +1,4 @@
 #ifdef _WIN32
 #ifndef _UNISTD_H
 #define _UNISTD_H    1
@ -6,12 +7,13 @@
 *  Please add functionality as needed
 */
-#include <stdlib.h>
+#include <Winsock2.h>
 #include <direct.h> /* for _getcwd() and _chdir() */
 #include <getopt.h>
 #include <io.h>
 #include <process.h> /* for getpid() and the exec..() family */
-#include <direct.h> /* for _getcwd() and _chdir() */
+#include <stdlib.h>
 #include "getopt.h" /* getopt at: https://gist.github.com/ashelly/7776712 */
 #define srandom srand
 #define random rand
@ -19,7 +21,7 @@
 These may be OR'd together.  */
 #define R_OK    4       /* Test for read permission.  */
 #define W_OK    2       /* Test for write permission.  */
-//#define   X_OK    1       /* execute permission - unsupported in windows*/
+#define X_OK R_OK /* execute permission - unsupported in Windows, \
 #define F_OK    0       /* Test for existence.  */
 #define access _access
@ -48,5 +50,7 @@ These may be OR'd together.  */
 //typedef unsigned __int16  uint16_t;
 //typedef unsigned __int32  uint32_t;
 //typedef unsigned __int64  uint64_t;
-
+#endif /* _UNISTD_H  */
-#endif /* unistd.h  */
+#else
 #include <unistd.h>
 #endif /* _WIN32  */
--- a/src/data.c
+++ b/src/data.c
@ -41,7 +41,7 @@ char **get_random_paths_indexes(char **paths, int n, int m, int *indexes)
 char **get_random_paths(char **paths, int n, int m)
 {
-    char **random_paths = calloc(n, sizeof(char*));
+    char** random_paths = (char**)calloc(n, sizeof(char*));
    int i;
    pthread_mutex_lock(&mutex);
    //printf("n = %d \n", n);
@ -60,7 +60,7 @@ char **get_random_paths(char **paths, int n, int m)
 char **find_replace_paths(char **paths, int n, char *find, char *replace)
 {
-    char **replace_paths = calloc(n, sizeof(char*));
+    char** replace_paths = (char**)calloc(n, sizeof(char*));
    int i;
    for(i = 0; i < n; ++i){
        char replaced[4096];
@ -75,7 +75,7 @@ matrix load_image_paths_gray(char **paths, int n, int w, int h)
    int i;
    matrix X;
    X.rows = n;
-    X.vals = calloc(X.rows, sizeof(float*));
+    X.vals = (float**)calloc(X.rows, sizeof(float*));
    X.cols = 0;
    for(i = 0; i < n; ++i){
@ -96,7 +96,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
    int i;
    matrix X;
    X.rows = n;
-    X.vals = calloc(X.rows, sizeof(float*));
+    X.vals = (float**)calloc(X.rows, sizeof(float*));
    X.cols = 0;
    for(i = 0; i < n; ++i){
@ -112,7 +112,7 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
    int i;
    matrix X;
    X.rows = n;
-    X.vals = calloc(X.rows, sizeof(float*));
+    X.vals = (float**)calloc(X.rows, sizeof(float*));
    X.cols = 0;
    for(i = 0; i < n; ++i){
@ -139,7 +139,7 @@ extern int check_mistakes;
 box_label *read_boxes(char *filename, int *n)
 {
-    box_label *boxes = calloc(1, sizeof(box_label));
+    box_label* boxes = (box_label*)calloc(1, sizeof(box_label));
    FILE *file = fopen(filename, "r");
    if (!file) {
        printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename);
@ -158,7 +158,7 @@ box_label *read_boxes(char *filename, int *n)
    int id;
    int count = 0;
    while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
-        boxes = realloc(boxes, (count+1)*sizeof(box_label));
+        boxes = (box_label*)realloc(boxes, (count + 1) * sizeof(box_label));
        boxes[count].id = id;
        boxes[count].x = x;
        boxes[count].y = y;
@ -300,7 +300,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
    free(boxes);
 }
-void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
+void fill_truth_detection(const char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
    int small_object, int net_w, int net_h)
 {
    char labelpath[4096];
@ -391,7 +391,6 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
    free(boxes);
 }
 #define NUMCHARS 37
 void print_letters(float *pred, int n)
 {
@ -565,7 +564,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
    d.shallow = 0;
    d.X.rows = n;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*3;
@ -619,7 +618,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
    d.shallow = 0;
    d.X.rows = n;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*6;
    int k = 2*(classes);
@ -628,7 +627,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
        image im1 = load_image_color(paths[i*2],   w, h);
        image im2 = load_image_color(paths[i*2+1], w, h);
-        d.X.vals[i] = calloc(d.X.cols, sizeof(float));
+        d.X.vals[i] = (float*)calloc(d.X.cols, sizeof(float));
        memcpy(d.X.vals[i],         im1.data, h*w*3*sizeof(float));
        memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float));
@ -690,7 +689,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
    d.h = h;
    d.X.rows = 1;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*3;
    int k = (4+classes)*30;
@ -729,12 +728,12 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
 }
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/imgproc/imgproc_c.h>
-#include "opencv2/core/version.hpp"
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
-#include "opencv2/imgcodecs/imgcodecs_c.h"
+#include <opencv2/imgcodecs/imgcodecs_c.h>
 #endif
 #include "http_stream.h"
@ -748,7 +747,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
    d.shallow = 0;
    d.X.rows = n;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*c;
    d.y = make_matrix(n, 5*boxes);
@ -817,7 +816,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
    d.shallow = 0;
    d.X.rows = n;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*c;
    d.y = make_matrix(n, 5 * boxes);
@ -903,7 +902,7 @@ void *load_thread(void *ptr)
 pthread_t load_data_in_thread(load_args args)
 {
    pthread_t thread;
-    struct load_args *ptr = calloc(1, sizeof(struct load_args));
+    struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
    *ptr = args;
    if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed");
    return thread;
@ -918,8 +917,8 @@ void *load_threads(void *ptr)
    data *out = args.d;
    int total = args.n;
    free(ptr);
-    data *buffers = calloc(args.threads, sizeof(data));
+    data* buffers = (data*)calloc(args.threads, sizeof(data));
-    pthread_t *threads = calloc(args.threads, sizeof(pthread_t));
+    pthread_t* threads = (pthread_t*)calloc(args.threads, sizeof(pthread_t));
    for(i = 0; i < args.threads; ++i){
        args.d = buffers + i;
        args.n = (i+1) * total/args.threads - i * total/args.threads;
@ -942,7 +941,7 @@ void *load_threads(void *ptr)
 pthread_t load_data(load_args args)
 {
    pthread_t thread;
-    struct load_args *ptr = calloc(1, sizeof(struct load_args));
+    struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
    *ptr = args;
    if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
    return thread;
@ -996,11 +995,11 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
    int i;
    d.X.rows = n;
-    d.X.vals = calloc(n, sizeof(float*));
+    d.X.vals = (float**)calloc(n, sizeof(float*));
    d.X.cols = w*h*3;
    d.y.rows = n;
-    d.y.vals = calloc(n, sizeof(float*));
+    d.y.vals = (float**)calloc(n, sizeof(float*));
    d.y.cols = w*scale * h*scale * 3;
    for(i = 0; i < n; ++i){
@ -1048,7 +1047,7 @@ matrix concat_matrix(matrix m1, matrix m2)
    matrix m;
    m.cols = m1.cols;
    m.rows = m1.rows+m2.rows;
-    m.vals = calloc(m1.rows + m2.rows, sizeof(float*));
+    m.vals = (float**)calloc(m1.rows + m2.rows, sizeof(float*));
    for(i = 0; i < m1.rows; ++i){
        m.vals[count++] = m1.vals[i];
    }
@ -1072,9 +1071,9 @@ data concat_datas(data *d, int n)
    int i;
    data out = {0};
    for(i = 0; i < n; ++i){
-        data new = concat_data(d[i], out);
+        data newdata = concat_data(d[i], out);
        free_data(out);
-        out = new;
+        out = newdata;
    }
    return out;
 }
@ -1300,8 +1299,8 @@ data get_random_data(data d, int num)
    r.X.cols = d.X.cols;
    r.y.cols = d.y.cols;
-    r.X.vals = calloc(num, sizeof(float *));
+    r.X.vals = (float**)calloc(num, sizeof(float*));
-    r.y.vals = calloc(num, sizeof(float *));
+    r.y.vals = (float**)calloc(num, sizeof(float*));
    int i;
    for(i = 0; i < num; ++i){
@ -1314,7 +1313,7 @@ data get_random_data(data d, int num)
 data *split_data(data d, int part, int total)
 {
-    data *split = calloc(2, sizeof(data));
+    data* split = (data*)calloc(2, sizeof(data));
    int i;
    int start = part*d.X.rows/total;
    int end = (part+1)*d.X.rows/total;
@ -1327,10 +1326,10 @@ data *split_data(data d, int part, int total)
    train.X.cols = test.X.cols = d.X.cols;
    train.y.cols = test.y.cols = d.y.cols;
-    train.X.vals = calloc(train.X.rows, sizeof(float*));
+    train.X.vals = (float**)calloc(train.X.rows, sizeof(float*));
-    test.X.vals = calloc(test.X.rows, sizeof(float*));
+    test.X.vals = (float**)calloc(test.X.rows, sizeof(float*));
-    train.y.vals = calloc(train.y.rows, sizeof(float*));
+    train.y.vals = (float**)calloc(train.y.rows, sizeof(float*));
-    test.y.vals = calloc(test.y.rows, sizeof(float*));
+    test.y.vals = (float**)calloc(test.y.rows, sizeof(float*));
    for(i = 0; i < start; ++i){
        train.X.vals[i] = d.X.vals[i];
--- a/src/data.h
+++ b/src/data.h
@ -2,14 +2,14 @@
 #define DATA_H
 #include <pthread.h>
-#if defined(_MSC_VER) && _MSC_VER < 1900
+#include "darknet.h"
 	#define inline __inline
 #endif
 #include "darknet.h"
 #include "matrix.h"
 #include "list.h"
 #include "image.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "tree.h"
 static inline float distance_from_edge(int x, int max)
@ -115,5 +115,8 @@ data *split_data(data d, int part, int total);
 data concat_data(data d1, data d2);
 data concat_datas(data *d, int n);
 void fill_truth(char *path, char **labels, int k, float *truth);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/deconvolutional_kernels.cu
+++ b/src/deconvolutional_kernels.cu
@ -2,7 +2,6 @@
 #include "curand.h"
 #include "cublas_v2.h"
 extern "C" {
 #include "convolutional_layer.h"
 #include "deconvolutional_layer.h"
 #include "gemm.h"
@ -11,7 +10,6 @@ extern "C" {
 #include "col2im.h"
 #include "utils.h"
 #include "cuda.h"
 }
 extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
 {
@ -95,7 +93,7 @@ extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
    cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
 }
-extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
+extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay)
 {
    int size = layer.size*layer.size*layer.c*layer.n;
--- a/src/deconvolutional_layer.c
+++ b/src/deconvolutional_layer.c
@ -46,7 +46,7 @@ image get_deconvolutional_delta(deconvolutional_layer l)
 deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
 {
    int i;
-    deconvolutional_layer l = {0};
+    deconvolutional_layer l = { (LAYER_TYPE)0 };
    l.type = DECONVOLUTIONAL;
    l.h = h;
@ -57,11 +57,11 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
    l.stride = stride;
    l.size = size;
-    l.weights = calloc(c*n*size*size, sizeof(float));
+    l.weights = (float*)calloc(c * n * size * size, sizeof(float));
-    l.weight_updates = calloc(c*n*size*size, sizeof(float));
+    l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
-    l.biases = calloc(n, sizeof(float));
+    l.biases = (float*)calloc(n, sizeof(float));
-    l.bias_updates = calloc(n, sizeof(float));
+    l.bias_updates = (float*)calloc(n, sizeof(float));
    float scale = 1./sqrt(size*size*c);
    for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
    for(i = 0; i < n; ++i){
@ -76,9 +76,9 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
    l.outputs = l.out_w * l.out_h * l.out_c;
    l.inputs = l.w * l.h * l.c;
-    l.col_image = calloc(h*w*size*size*n, sizeof(float));
+    l.col_image = (float*)calloc(h * w * size * size * n, sizeof(float));
-    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
+    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
-    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));
+    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
    l.forward = forward_deconvolutional_layer;
    l.backward = backward_deconvolutional_layer;
@ -110,11 +110,11 @@ void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w)
    int out_h = deconvolutional_out_height(*l);
    int out_w = deconvolutional_out_width(*l);
-    l->col_image = realloc(l->col_image,
+    l->col_image = (float*)realloc(l->col_image,
                                out_h*out_w*l->size*l->size*l->c*sizeof(float));
-    l->output = realloc(l->output,
+    l->output = (float*)realloc(l->output,
                                l->batch*out_h * out_w * l->n*sizeof(float));
-    l->delta  = realloc(l->delta,
+    l->delta = (float*)realloc(l->delta,
                                l->batch*out_h * out_w * l->n*sizeof(float));
    #ifdef GPU
    cuda_free(l->col_image_gpu);
@ -191,7 +191,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
    }
 }
-void update_deconvolutional_layer(deconvolutional_layer l, float learning_rate, float momentum, float decay)
+void update_deconvolutional_layer(deconvolutional_layer l, int skip, float learning_rate, float momentum, float decay)
 {
    int size = l.size*l.size*l.c*l.n;
    axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1);
--- a/src/deconvolutional_layer.h
+++ b/src/deconvolutional_layer.h
@ -9,10 +9,13 @@
 typedef layer deconvolutional_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 #ifdef GPU
 void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
 void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
-void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
+void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay);
 void push_deconvolutional_layer(deconvolutional_layer layer);
 void pull_deconvolutional_layer(deconvolutional_layer layer);
 #endif
@ -20,7 +23,7 @@ void pull_deconvolutional_layer(deconvolutional_layer layer);
 deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
 void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
 void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
-void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
+void update_deconvolutional_layer(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay);
 void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
 image get_deconvolutional_image(deconvolutional_layer layer);
@ -30,5 +33,8 @@ image get_deconvolutional_filter(deconvolutional_layer layer, int i);
 int deconvolutional_out_height(deconvolutional_layer layer);
 int deconvolutional_out_width(deconvolutional_layer layer);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/demo.c
+++ b/src/demo.c
@ -9,20 +9,18 @@
 #include "demo.h"
 #ifdef WIN32
 #include <time.h>
 #include <winsock.h>
 #include "gettimeofday.h"
 #else
 #include <sys/time.h>
 #endif
 #define FRAMES 3
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/imgproc/imgproc_c.h>
-#include "opencv2/core/version.hpp"
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
 #include "http_stream.h"
 image get_image_from_stream(CvCapture *cap);
@ -45,10 +43,10 @@ static int demo_ext_output = 0;
 static long long int frame_id = 0;
 static int demo_json_port = -1;
-static float *predictions[FRAMES];
+static float* predictions[NFRAMES];
 static int demo_index = 0;
-static image images[FRAMES];
+static image images[NFRAMES];
-static IplImage* ipl_images[FRAMES];
+static IplImage* ipl_images[NFRAMES];
 static float *avg;
 void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output);
@ -77,7 +75,7 @@ void *fetch_in_thread(void *ptr)
        //error("Stream closed.");
        printf("Stream closed.\n");
        flag_exit = 1;
-        return EXIT_FAILURE;
+        exit(EXIT_FAILURE);
    }
    //in_s = resize_image(in, net.w, net.h);
@ -91,14 +89,14 @@ void *detect_in_thread(void *ptr)
    float *prediction = network_predict(net, X);
    memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
-    mean_arrays(predictions, FRAMES, l.outputs, avg);
+    mean_arrays(predictions, NFRAMES, l.outputs, avg);
    l.output = avg;
    free_image(det_s);
    ipl_images[demo_index] = det_img;
-    det_img = ipl_images[(demo_index + FRAMES / 2 + 1) % FRAMES];
+    det_img = ipl_images[(demo_index + NFRAMES / 2 + 1) % NFRAMES];
-    demo_index = (demo_index + 1) % FRAMES;
+    demo_index = (demo_index + 1) % NFRAMES;
    if (letter_box)
        dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
@ -110,11 +108,11 @@ void *detect_in_thread(void *ptr)
 double get_wall_time()
 {
-    struct timeval time;
+    struct timeval walltime;
-    if (gettimeofday(&time,NULL)){
+    if (gettimeofday(&walltime, NULL)) {
        return 0;
    }
-    return (double)time.tv_sec + (double)time.tv_usec * .000001;
+    return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001;
 }
 void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
@ -161,8 +159,8 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
    int j;
    avg = (float *) calloc(l.outputs, sizeof(float));
-    for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
+    for(j = 0; j < NFRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
-    for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3);
+    for(j = 0; j < NFRAMES; ++j) images[j] = make_image(1,1,3);
    if (l.classes != demo_classes) {
        printf("Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes);
@ -185,7 +183,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
    det_img = in_img;
    det_s = in_s;
-    for(j = 0; j < FRAMES/2; ++j){
+    for (j = 0; j < NFRAMES / 2; ++j) {
        fetch_in_thread(0);
        detect_in_thread(0);
        det_img = in_img;
@ -318,10 +316,10 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
    free_image(in_s);
    free(avg);
-    for (j = 0; j < FRAMES; ++j) free(predictions[j]);
+    for (j = 0; j < NFRAMES; ++j) free(predictions[j]);
-    for (j = 0; j < FRAMES; ++j) free_image(images[j]);
+    for (j = 0; j < NFRAMES; ++j) free_image(images[j]);
-    free_ptrs(names, net.layers[net.n - 1].classes);
+    free_ptrs((void **)names, net.layers[net.n - 1].classes);
    int i;
    const int nsize = 8;
@ -342,4 +340,3 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
    fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
 }
 #endif
--- a/src/demo.h
+++ b/src/demo.h
@ -1,8 +1,14 @@
-#ifndef DEMO
+#ifndef DEMO_H
-#define DEMO
+#define DEMO_H
 #include "image.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
    int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int json_port, int dont_show, int ext_output);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@ -12,7 +12,7 @@
 detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore)
 {
-    detection_layer l = {0};
+    detection_layer l = { (LAYER_TYPE)0 };
    l.type = DETECTION;
    l.n = n;
@ -25,11 +25,11 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
    l.w = side;
    l.h = side;
    assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
-    l.cost = calloc(1, sizeof(float));
+    l.cost = (float*)calloc(1, sizeof(float));
    l.outputs = l.inputs;
    l.truths = l.side*l.side*(1+l.coords+l.classes);
-    l.output = calloc(batch*l.outputs, sizeof(float));
+    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
-    l.delta = calloc(batch*l.outputs, sizeof(float));
+    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
    l.forward = forward_detection_layer;
    l.backward = backward_detection_layer;
@ -182,7 +182,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
        }
        if(0){
-            float *costs = calloc(l.batch*locations*l.n, sizeof(float));
+            float* costs = (float*)calloc(l.batch * locations * l.n, sizeof(float));
            for (b = 0; b < l.batch; ++b) {
                int index = b*l.inputs;
                for (i = 0; i < locations; ++i) {
@ -259,11 +259,11 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state)
        return;
    }
-    float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
+    float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
    float *truth_cpu = 0;
    if(state.truth){
        int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
-        truth_cpu = calloc(num_truth, sizeof(float));
+        truth_cpu = (float*)calloc(num_truth, sizeof(float));
        cuda_pull_array(state.truth, truth_cpu, num_truth);
    }
    cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);
--- a/src/detection_layer.h
+++ b/src/detection_layer.h
@ -6,6 +6,9 @@
 typedef layer detection_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
 void forward_detection_layer(const detection_layer l, network_state state);
 void backward_detection_layer(const detection_layer l, network_state state);
@ -17,4 +20,7 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state);
 void backward_detection_layer_gpu(detection_layer l, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/detector.c
+++ b/src/detector.c
@ -9,27 +9,33 @@
 #include "option_list.h"
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
-#include "opencv2/core/core_c.h"
+#include <opencv2/core/core_c.h>
 //#include "opencv2/core/core.hpp"
-#include "opencv2/core/version.hpp"
+#include <opencv2/core/version.hpp>
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/imgproc/imgproc_c.h>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION)
 #ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib")
 #endif    // USE_CMAKE_LIBS
 #else
 #define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)"" CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)
 #ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib")
 #pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib")
 #pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib")
 #endif    // USE_CMAKE_LIBS
 #endif
 IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
 void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
    float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port);
 #endif // OPENCV
 #ifndef CV_RGB
 #define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
 #endif    // OPENCV
@ -81,7 +87,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    float avg_loss = -1;
-    network *nets = calloc(ngpus, sizeof(network));
+    network* nets = (network*)calloc(ngpus, sizeof(network));
    srand(time(0));
    int seed = rand();
@ -410,8 +416,8 @@ void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int
        if (ymax > h) ymax = h;
        for (j = 0; j < classes; ++j) {
-            int class = j;
+            int myclass = j;
-            if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[class],
+            if (dets[i].prob[myclass]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[myclass],
                xmin, ymin, xmax, ymax);
        }
    }
@ -465,7 +471,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
    }
    else {
        if (!outfile) outfile = "comp4_det_test_";
-        fps = calloc(classes, sizeof(FILE *));
+        fps = (FILE**)calloc(classes, sizeof(FILE*));
        for (j = 0; j < classes; ++j) {
            snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
            fps[j] = fopen(buff, "w");
@ -482,11 +488,11 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
    int nthreads = 4;
    if (m < 4) nthreads = m;
-    image *val = calloc(nthreads, sizeof(image));
+    image* val = (image*)calloc(nthreads, sizeof(image));
-    image *val_resized = calloc(nthreads, sizeof(image));
+    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image *buf = calloc(nthreads, sizeof(image));
+    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image *buf_resized = calloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
    load_args args = { 0 };
    args.w = net.w;
@ -702,11 +708,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
    int nthreads = 4;
    if (m < 4) nthreads = m;
-    image *val = calloc(nthreads, sizeof(image));
+    image* val = (image*)calloc(nthreads, sizeof(image));
-    image *val_resized = calloc(nthreads, sizeof(image));
+    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image *buf = calloc(nthreads, sizeof(image));
+    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image *buf_resized = calloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
    load_args args = { 0 };
    args.w = net.w;
@ -720,11 +726,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
    int tp_for_thresh = 0;
    int fp_for_thresh = 0;
-    box_prob *detections = calloc(1, sizeof(box_prob));
+    box_prob* detections = (box_prob*)calloc(1, sizeof(box_prob));
    int detections_count = 0;
    int unique_truth_count = 0;
-    int *truth_classes_count = calloc(classes, sizeof(int));
+    int* truth_classes_count = (int*)calloc(classes, sizeof(int));
    for (t = 0; t < nthreads; ++t) {
        args.path = paths[i + t];
@ -798,7 +804,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
                    float prob = dets[i].prob[class_id];
                    if (prob > 0) {
                        detections_count++;
-                        detections = realloc(detections, detections_count * sizeof(box_prob));
+                        detections = (box_prob*)realloc(detections, detections_count * sizeof(box_prob));
                        detections[detections_count - 1].b = dets[i].bbox;
                        detections[detections_count - 1].p = prob;
                        detections[detections_count - 1].image_index = image_index;
@ -890,14 +896,14 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
    } pr_t;
    // for PR-curve
-    pr_t **pr = calloc(classes, sizeof(pr_t*));
+    pr_t** pr = (pr_t**)calloc(classes, sizeof(pr_t*));
    for (i = 0; i < classes; ++i) {
-        pr[i] = calloc(detections_count, sizeof(pr_t));
+        pr[i] = (pr_t*)calloc(detections_count, sizeof(pr_t));
    }
    printf("\n detections_count = %d, unique_truth_count = %d  \n", detections_count, unique_truth_count);
-    int *truth_flags = calloc(unique_truth_count, sizeof(int));
+    int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int));
    int rank;
    for (rank = 0; rank < detections_count; ++rank) {
@ -993,7 +999,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
    if (reinforcement_fd != NULL) fclose(reinforcement_fd);
    // free memory
-    free_ptrs(names, net.layers[net.n - 1].classes);
+    free_ptrs((void**)names, net.layers[net.n - 1].classes);
    free_list_contents_kvp(options);
    free_list(options);
@ -1043,7 +1049,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
    }
    //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 };
-    float *rel_width_height_array = calloc(1000, sizeof(float));
+    float* rel_width_height_array = (float*)calloc(1000, sizeof(float));
    list *options = read_data_cfg(datacfg);
@ -1079,7 +1085,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
                if (check_mistakes) getchar();
            }
            number_of_boxes++;
-            rel_width_height_array = realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
+            rel_width_height_array = (float*)realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
            rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width;
            rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height;
            printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes);
@ -1104,7 +1110,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
    // K-means
    anchors_data = do_kmeans(boxes_data, num_of_clusters);
-    qsort(anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), anchors_data_comparator);
+    qsort((void*)anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), (__compar_fn_t)anchors_data_comparator);
    //gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66
    //float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 };
@ -1285,8 +1291,8 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
        layer l = net.layers[net.n - 1];
        //box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
-        //float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
+        //float **probs = calloc(l.w*l.h*l.n, sizeof(float*));
-        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
+        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
        float *X = sized.data;
@ -1365,7 +1371,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
    }
    // free memory
-    free_ptrs(names, net.layers[net.n - 1].classes);
+    free_ptrs((void**)names, net.layers[net.n - 1].classes);
    free_list_contents_kvp(options);
    free_list(options);
@ -1421,7 +1427,7 @@ void run_detector(int argc, char **argv)
        for (i = 0; i < len; ++i) {
            if (gpu_list[i] == ',') ++ngpus;
        }
-        gpus = calloc(ngpus, sizeof(int));
+        gpus = (int*)calloc(ngpus, sizeof(int));
        for (i = 0; i < ngpus; ++i) {
            gpus[i] = atoi(gpu_list);
            gpu_list = strchr(gpu_list, ',') + 1;
--- a/src/dice.c
+++ b/src/dice.c
@ -9,7 +9,7 @@ void train_dice(char *cfgfile, char *weightfile)
    srand(time(0));
    float avg_loss = -1;
    char *base = basecfg(cfgfile);
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
    printf("%s\n", base);
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
--- a/src/dropout_layer.c
+++ b/src/dropout_layer.c
@ -6,13 +6,13 @@
 dropout_layer make_dropout_layer(int batch, int inputs, float probability)
 {
-    dropout_layer l = {0};
+    dropout_layer l = { (LAYER_TYPE)0 };
    l.type = DROPOUT;
    l.probability = probability;
    l.inputs = inputs;
    l.outputs = inputs;
    l.batch = batch;
-    l.rand = calloc(inputs*batch, sizeof(float));
+    l.rand = (float*)calloc(inputs * batch, sizeof(float));
    l.scale = 1./(1.-probability);
    l.forward = forward_dropout_layer;
    l.backward = backward_dropout_layer;
@ -27,7 +27,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
 void resize_dropout_layer(dropout_layer *l, int inputs)
 {
-    l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float));
+    l->rand = (float*)realloc(l->rand, l->inputs * l->batch * sizeof(float));
    #ifdef GPU
    cuda_free(l->rand_gpu);
--- a/src/dropout_layer.h
+++ b/src/dropout_layer.h
@ -6,6 +6,9 @@
 typedef layer dropout_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 dropout_layer make_dropout_layer(int batch, int inputs, float probability);
 void forward_dropout_layer(dropout_layer l, network_state state);
@ -16,5 +19,8 @@ void resize_dropout_layer(dropout_layer *l, int inputs);
 void forward_dropout_layer_gpu(dropout_layer l, network_state state);
 void backward_dropout_layer_gpu(dropout_layer l, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/dropout_layer_kernels.cu
+++ b/src/dropout_layer_kernels.cu
@ -2,11 +2,9 @@
 #include "curand.h"
 #include "cublas_v2.h"
 extern "C" {
 #include "dropout_layer.h"
 #include "cuda.h"
 #include "utils.h"
 }
 __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
 {
--- a/src/gemm.c
+++ b/src/gemm.c
@ -7,7 +7,10 @@
 #include <math.h>
 #include <float.h>
 #include <string.h>
-
+#include <stdint.h>
 #ifdef _WIN32
 #include <intrin.h>
 #endif
 #if defined(_OPENMP)
 #include <omp.h>
 #endif
@ -37,7 +40,7 @@ void gemm_bin(int M, int N, int K, float ALPHA,
 float *random_matrix(int rows, int cols)
 {
    int i;
-    float *m = calloc(rows*cols, sizeof(float));
+    float* m = (float*)calloc(rows * cols, sizeof(float));
    for(i = 0; i < rows*cols; ++i){
        m[i] = (float)rand()/RAND_MAX;
    }
@ -83,7 +86,6 @@ void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
 // XNOR bitwise GEMM for binary neural network
 //--------------------------------------------
 #include <stdint.h>
 static inline unsigned char xnor(unsigned char a, unsigned char b) {
    //return a == b;
@ -318,6 +320,7 @@ void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb)
    }
 }
 #ifndef GPU
 uint8_t reverse_8_bit(uint8_t a) {
    return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16;
 }
@ -465,6 +468,9 @@ void transpose_bin(char *A, char *B, const int n, const int m,
 }
 */
 #else
 extern void transpose_32x32_bits_reversed_diagonale(uint32_t* A, uint32_t* B, int m, int n);
 #endif
 // transpose by 32-bit
 void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
@ -483,7 +489,7 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
            //transpose_32x32_bits_my(&A[a_index/32], &B[b_index/32], lda/32, ldb/32);
        }
        for (; j < m; ++j) {
-            if (get_bit(A, i*lda + j)) set_bit(B, j*ldb + i);
+            if (get_bit((const unsigned char* const)A, i * lda + j)) set_bit((unsigned char* const)B, j * ldb + i);
        }
    }
 }
@ -703,7 +709,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
    else {
        for (i = 0; i < M; ++i) {
            for (k = 0; k < K; ++k) {
-                register float A_PART = ALPHA*A[i*lda + k];
+                float A_PART = ALPHA * A[i * lda + k];
                for (j = 0; j < N; ++j) {
                    C[i*ldc + j] += A_PART*B[k*ldb + j];
                }
@ -730,9 +736,6 @@ void gemm_nn(int M, int N, int K, float ALPHA,
 }
 #define TILE_M 4    // 4 ops
 #define TILE_N 16   // AVX2 = 2 ops * 8 floats
 #define TILE_K 16   // loop
 void gemm_nn_fast(int M, int N, int K, float ALPHA,
    float *A, int lda,
@ -1286,16 +1289,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
 }
 static inline float im2col_get_pixel(float *im, int height, int width, int channels,
    int row, int col, int channel, int pad)
 {
    row -= pad;
    col -= pad;
    if (row < 0 || col < 0 ||
        row >= height || col >= width) return 0;
    return im[col + width*(row + height*channel)];
 }
 //From Berkeley Vision's Caffe!
 //https://github.com/BVLC/caffe/blob/master/LICENSE
@ -1645,7 +1639,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    __m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS);
                    uint16_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1
-                    uint16_t *dst_ptr = &((unsigned char*)data_col)[col_index / 8];
+                    uint16_t* dst_ptr = &((uint16_t*)data_col)[col_index / 8];
                    *dst_ptr |= (mask << (col_index % 8));
                }
@ -1657,7 +1651,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
                    float val = data_im[im_col + width*(im_row + height*c_im)];
-                    if(val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                }
            }
@ -1671,7 +1665,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                    float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                }
            }
@ -1685,7 +1679,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                    float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                }
            }
@ -1699,7 +1693,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                    float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                }
            }
@ -1713,7 +1707,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                    float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                }
            }
        }
@ -1952,7 +1946,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
    int i, j, k;
    for (i = 0; i < M; ++i) {
        for (k = 0; k < K; ++k) {
-            register float A_PART = ALPHA*A[i*lda + k];
+            float A_PART = ALPHA * A[i * lda + k];
            for (j = 0; j < N; ++j) {
                C[i*ldc + j] += A_PART*B[k*ldb + j];
            }
@ -2239,7 +2233,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    int col_index = c * new_ldb + h * width_col + w;
                    float val = data_im[im_col + width*(im_row + height*c_im)];
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                }
                for (; w < width_col - pad; ++w) {
@ -2250,7 +2244,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
                    float val = data_im[im_col + width*(im_row + height*c_im)];
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                }
            }
@ -2264,7 +2258,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                    float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                }
            }
@ -2278,7 +2272,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                    float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                }
            }
@ -2292,7 +2286,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                    float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                }
            }
@ -2306,7 +2300,7 @@ void im2col_cpu_custom_bin(float* data_im,
                    //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                    float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                }
            }
        }
@ -2346,7 +2340,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size)
    memset(dst, 0, dst_size);
    size_t i;
-    char *byte_arr = calloc(size, sizeof(char));
+    char* byte_arr = (char*)calloc(size, sizeof(char));
    for (i = 0; i < size; ++i) {
        if (src[i] > 0) byte_arr[i] = 1;
    }
@ -2578,7 +2572,7 @@ void gemm_nt(int M, int N, int K, float ALPHA,
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(j = 0; j < N; ++j){
-            register float sum = 0;
+            float sum = 0;
            for(k = 0; k < K; ++k){
                sum += ALPHA*A[i*lda+k]*B[j*ldb + k];
            }
@ -2595,7 +2589,7 @@ void gemm_tn(int M, int N, int K, float ALPHA,
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(k = 0; k < K; ++k){
-            register float A_PART = ALPHA*A[k*lda+i];
+            float A_PART = ALPHA * A[k * lda + i];
            for(j = 0; j < N; ++j){
                C[i*ldc+j] += A_PART*B[k*ldb+j];
            }
@ -2611,7 +2605,7 @@ void gemm_tt(int M, int N, int K, float ALPHA,
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(j = 0; j < N; ++j){
-            register float sum = 0;
+            float sum = 0;
            for(k = 0; k < K; ++k){
                sum += ALPHA*A[i+k*lda]*B[k+j*ldb];
            }
@ -2668,9 +2662,9 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
        float *C_gpu, int ldc)
 {
    cublasHandle_t handle = blas_handle();
-    cudaError_t stream_status = cublasSetStream(handle, get_cuda_stream());
+    cudaError_t stream_status = (cudaError_t)cublasSetStream(handle, get_cuda_stream());
    CHECK_CUDA(stream_status);
-    cudaError_t status = cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N),
+    cudaError_t status = (cudaError_t)cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N),
            (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
    CHECK_CUDA(status);
 }
--- a/src/gemm.h
+++ b/src/gemm.h
@ -3,6 +3,9 @@
 #include "activations.h"
 #include <stdint.h>
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
    float *weights, float *input, float *output, float *mean);
@ -56,6 +59,7 @@ void im2col_cpu_custom_transpose(float* data_im,
 void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a);
 LIB_API void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n);
 void gemm_bin(int M, int N, int K, float ALPHA,
        char  *A, int lda,
@ -109,4 +113,7 @@ void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
        float BETA,
        float *C, int ldc);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/getopt.c
+++ b/src/getopt.c
--- a/src/getopt.h
+++ b/src/getopt.h
@ -1,133 +1,228 @@
-/* Declarations for getopt.
+#ifdef _WIN32
-   Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
+#ifndef __GETOPT_H__
 /**
 * DISCLAIMER
 * This file is part of the mingw-w64 runtime package.
 *
 * The mingw-w64 runtime package and its code is distributed in the hope that it
 * will be useful but WITHOUT ANY WARRANTY.  ALL WARRANTIES, EXPRESSED OR
 * IMPLIED ARE HEREBY DISCLAIMED.  This includes but is not limited to
 * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */
 /*
 * Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Sponsored in part by the Defense Advanced Research Projects
 * Agency (DARPA) and Air Force Research Laboratory, Air Force
 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
 */
 /*-
 * Copyright (c) 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Dieter Baron and Thomas Klausner.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
-This file is part of the GNU C Library.  Its master source is NOT part of
+#define __GETOPT_H__
 the C library, however.  The master source lives in /gd/gnu/lib.
-The GNU C Library is free software; you can redistribute it and/or
+/* All the headers include this file. */
-modify it under the terms of the GNU Library General Public License as
+#include <crtdefs.h>
-published by the Free Software Foundation; either version 2 of the
+#include <errno.h>
-License, or (at your option) any later version.
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #define WIN32_LEAN_AND_MEAN
 #include <Windows.h>
-The GNU C Library is distributed in the hope that it will be useful,
+#ifdef __cplusplus
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Library General Public License for more details.
 You should have received a copy of the GNU Library General Public
 License along with the GNU C Library; see the file COPYING.LIB.  If
 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
 Cambridge, MA 02139, USA.  */
 #ifndef _GETOPT_H
 #define _GETOPT_H 1
 #ifdef	__cplusplus
 extern "C" {
 #endif
-/* For communication from `getopt' to the caller.
+#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
   When `getopt' finds an option that takes an argument,
   the argument value is returned here.
   Also, when `ordering' is RETURN_IN_ORDER,
   each non-option ARGV-element is returned here.  */
-extern char *optarg;
+//extern int optind;		/* index of first non-option in argv      */
 //extern int optopt;		/* single option character, as parsed     */
 //extern int opterr;		/* flag to enable built-in diagnostics... */
 //				/* (user may set to zero, to suppress)    */
 //
 //extern char *optarg;		/* pointer to argument of current option  */
-/* Index in ARGV of the next element to be scanned.
+#define PRINT_ERROR ((opterr) && (*options != ':'))
   This is used for communication to and from the caller
   and for communication between successive calls to `getopt'.
-   On entry to `getopt', zero means this is the first call; initialize.
+#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
 #define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
 #define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
-   When `getopt' returns EOF, this is the index of the first of the
+/* return values */
-   non-option elements that the caller should itself scan.
+#define BADCH (int)'?'
 #define BADARG ((*options == ':') ? (int)':' : (int)'?')
 #define INORDER (int)1
-   Otherwise, `optind' communicates from one call to the next
+#ifndef __CYGWIN__
-   how much of ARGV has been scanned so far.  */
+#define __progname __argv[0]
 extern int optind;
 /* Callers store zero here to inhibit the error message `getopt' prints
   for unrecognized options.  */
 extern int opterr;
 /* Set to an option character which was unrecognized.  */
 extern int optopt;
 /* Describe the long-named options requested by the application.
   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
   of `struct option' terminated by an element containing a name which is
   zero.
   The field `has_arg' is:
   no_argument		(or 0) if the option does not take an argument,
   required_argument	(or 1) if the option requires an argument,
   optional_argument 	(or 2) if the option takes an optional argument.
   If the field `flag' is not NULL, it points to a variable that is set
   to the value given in the field `val' when the option is found, but
   left unchanged if the option is not found.
   To have a long-named option do something other than set an `int' to
   a compiled-in constant, such as set a value from `optarg', set the
   option's `flag' field to zero and its `val' field to a nonzero
   value (the equivalent single-letter option character, if there is
   one).  For long options that have a zero `flag' field, `getopt'
   returns the contents of the `val' field.  */
 struct option
 {
 #if defined (__STDC__) && __STDC__
  const char *name;
 #else
-  char *name;
+extern char __declspec(dllimport) * __progname;
 #endif
  /* has_arg can't be an enum because some compilers complain about
     type mismatches in all the code that assumes it is an int.  */
  int has_arg;
  int *flag;
  int val;
 };
-/* Names for the values of the `has_arg' field of `struct option'.  */
+#ifdef __CYGWIN__
 static char EMSG[] = "";
 #else
 #define EMSG ""
 #endif
-#define	no_argument		0
+static int getopt_internal(int, char* const*, const char*,
-#define required_argument	1
+    const struct option*, int*, int);
-#define optional_argument	2
+static int parse_long_options(char* const*, const char*,
    const struct option*, int*, int);
 static int gcd(int, int);
 static void permute_args(int, int, int, char* const*);
-#if defined (__STDC__) && __STDC__
+static char* place = EMSG; /* option letter processing */
 #ifdef __GNU_LIBRARY__
 /* Many other libraries have conflicting prototypes for getopt, with
   differences in the consts, in stdlib.h.  To avoid compilation
   errors, only prototype getopt for the GNU C library.  */
 extern int getopt (int argc, char *const *argv, const char *shortopts);
 #else /* not __GNU_LIBRARY__ */
 extern int getopt ();
 #endif /* __GNU_LIBRARY__ */
 extern int getopt_long (int argc, char *const *argv, const char *shortopts,
 		        const struct option *longopts, int *longind);
 extern int getopt_long_only (int argc, char *const *argv,
 			     const char *shortopts,
 		             const struct option *longopts, int *longind);
-/* Internal only.  Users should not call this directly.  */
+/* XXX: set optreset to 1 rather than these two */
-extern int _getopt_internal (int argc, char *const *argv,
+static int nonopt_start = -1; /* first non option argument (for permute) */
-			     const char *shortopts,
+static int nonopt_end = -1; /* first option after non options (for permute) */
 		             const struct option *longopts, int *longind,
 			     int long_only);
 #else /* not __STDC__ */
 extern int getopt ();
 extern int getopt_long ();
 extern int getopt_long_only ();
-extern int _getopt_internal ();
+/* Error messages */
-#endif /* __STDC__ */
+static const char recargchar[] = "option requires an argument -- %c";
 static const char recargstring[] = "option requires an argument -- %s";
 static const char ambig[] = "ambiguous option -- %.*s";
 static const char noarg[] = "option doesn't take an argument -- %.*s";
 static const char illoptchar[] = "unknown option -- %c";
 static const char illoptstring[] = "unknown option -- %s";
-#ifdef	__cplusplus
+static void _vwarnx(const char* fmt, va_list ap);
 static void warnx(const char* fmt, ...);
 /*
 * Compute the greatest common divisor of a and b.
 */
 static int gcd(int a, int b);
 /*
 * Exchange the block from nonopt_start to nonopt_end with the block
 * from nonopt_end to opt_end (keeping the same order of arguments
 * in each block).
 */
 static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv);
 #ifdef REPLACE_GETOPT
 /*
 * getopt --
 *	Parse argc/argv argument vector.
 *
 * [eventually this will replace the BSD getopt]
 */
 int getopt(int nargc, char* const* nargv, const char* options);
 #endif /* REPLACE_GETOPT */
 //extern int getopt(int nargc, char * const *nargv, const char *options);
 #ifdef _BSD_SOURCE
 /*
 * BSD adds the non-standard `optreset' feature, for reinitialisation
 * of `getopt' parsing.  We support this feature, for applications which
 * proclaim their BSD heritage, before including this header; however,
 * to maintain portability, developers are advised to avoid it.
 */
 #define optreset __mingw_optreset
 extern int optreset;
 #endif
 #ifdef __cplusplus
 }
 #endif
 /*
 * POSIX requires the `getopt' API to be specified in `unistd.h';
 * thus, `unistd.h' includes this header.  However, we do not want
 * to expose the `getopt_long' or `getopt_long_only' APIs, when
 * included in this manner.  Thus, close the standard __GETOPT_H__
 * declarations block, and open an additional __GETOPT_LONG_H__
 * specific block, only when *not* __UNISTD_H_SOURCED__, in which
 * to declare the extended API.
 */
 #endif /* !defined(__GETOPT_H__) */
 #if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
 #define __GETOPT_LONG_H__
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*
 * parse_long_options --
 *	Parse long options in argc/argv argument vector.
 * Returns -1 if short_too is set and the option does not match long_options.
 */
 static int parse_long_options(char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too);
 /*
 * getopt_internal --
 *	Parse argc/argv argument vector.  Called by user level routines.
 */
 static int getopt_internal(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags);
 /*
 * getopt_long --
 *	Parse argc/argv argument vector.
 */
 int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
 /*
 * getopt_long_only --
 *	Parse argc/argv argument vector.
 */
 int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
 /*
 * Previous MinGW implementation had...
 */
 #ifndef HAVE_DECL_GETOPT
 /*
 * ...for the long form API only; keep this for compatibility.
 */
 #define HAVE_DECL_GETOPT 1
 #endif
 #ifdef __cplusplus
 }
 #endif
-#endif /* _GETOPT_H */
+#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
 #endif
--- a/src/gettimeofday.c
+++ b/src/gettimeofday.c
@ -1,49 +1,43 @@
 #ifdef _WIN32
 #include "gettimeofday.h"
-int gettimeofday(struct timeval *tv, struct timezone *tz)
+LIB_API int gettimeofday(struct timeval* tp, struct timezone* tzp)
 {
-  FILETIME ft;
+  static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL);
-  unsigned __int64 tmpres = 0;
+  SYSTEMTIME system_time;
-  static int tzflag;
+  FILETIME file_time;
- 
+  uint64_t time;
-  if (NULL != tv)
+
  {
    GetSystemTimeAsFileTime(&ft);
    tmpres |= ft.dwHighDateTime;
    tmpres <<= 32;
    tmpres |= ft.dwLowDateTime;
  GetSystemTime(&system_time);
  SystemTimeToFileTime(&system_time, &file_time);
  time = ((uint64_t)file_time.dwLowDateTime);
  time += ((uint64_t)file_time.dwHighDateTime) << 32;
    /*converting file time to unix epoch*/
-    tmpres -= DELTA_EPOCH_IN_MICROSECS; 
+  tp->tv_sec = (long)((time - EPOCH) / 10000000L);
-    tmpres /= 10;  /*convert into microseconds*/
+  tp->tv_usec = (long)(system_time.wMilliseconds * 1000);
    tv->tv_sec = (long)(tmpres / 1000000UL);
    tv->tv_usec = (long)(tmpres % 1000000UL);
  }
  if (NULL != tz)
  {
    if (!tzflag)
    {
      _tzset();
      tzflag++;
    }
    tz->tz_minuteswest = _timezone / 60;
    tz->tz_dsttime = _daylight;
  }
  return 0;
  }
 LIB_API int clock_gettime(int dummy, struct timespec* ct)
  {
  LARGE_INTEGER count;
  if (g_first_time) {
    g_first_time = 0;
    if (0 == QueryPerformanceFrequency(&g_counts_per_sec)) {
      g_counts_per_sec.QuadPart = 0;
    }
  }
  if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) || (0 == QueryPerformanceCounter(&count))) {
    return -1;
 }
-/* never worry about timersub type activies again -- from GLIBC and upcased. */
+  ct->tv_sec = count.QuadPart / g_counts_per_sec.QuadPart;
-int timersub(struct timeval *a, struct timeval *b, struct timeval *result)
+  ct->tv_nsec = ((count.QuadPart % g_counts_per_sec.QuadPart) * BILLION) / g_counts_per_sec.QuadPart;
 {                                                                
         (result)->tv_sec = (a)->tv_sec - (b)->tv_sec;                        
         (result)->tv_usec = (a)->tv_usec - (b)->tv_usec;                     
         if ((result)->tv_usec < 0) {                                         
           --(result)->tv_sec;                                                
           (result)->tv_usec += 1000000;                                      
         }                                                                         
    return 0;
-}
+}
 #endif
--- a/src/gettimeofday.h
+++ b/src/gettimeofday.h
@ -1,20 +1,39 @@
-#pragma once
+#ifdef _WIN32
-
+#define WIN32_LEAN_AND_MEAN
 #include <Windows.h>
 #include <Winsock2.h>
 #include <stdint.h>
 #include < time.h >
-#include <windows.h> //I've ommited this line.
+#include "darknet.h"
-#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
+
-  #define DELTA_EPOCH_IN_MICROSECS  11644473600000000Ui64
+#define CLOCK_REALTIME (1)
-#else
+#define BILLION (1E9)
-  #define DELTA_EPOCH_IN_MICROSECS  11644473600000000ULL
+
 #ifndef timersub
 #define timersub(a, b, result)                       \
  do {                                               \
    (result)->tv_sec = (a)->tv_sec - (b)->tv_sec;    \
    (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
    if ((result)->tv_usec < 0) {                     \
      --(result)->tv_sec;                            \
      (result)->tv_usec += 1000000;                  \
    }                                                \
  } while (0)
 #endif // timersub
 #ifdef __cplusplus
 extern "C" {
 #endif
-struct timezone 
+static unsigned char g_first_time = 1;
-{
+static LARGE_INTEGER g_counts_per_sec;
-  int  tz_minuteswest; /* minutes W of Greenwich */
+
-  int  tz_dsttime;     /* type of dst correction */
+LIB_API int gettimeofday(struct timeval*, struct timezone*);
-};
+LIB_API int clock_gettime(int, struct timespec*);
- 
+
-int gettimeofday(struct timeval *tv, struct timezone *tz);
+#ifdef __cplusplus
 }
 #endif
 #endif
 /* never worry about timersub type activies again -- from GLIBC and upcased. */
 int timersub(struct timeval *a, struct timeval *b, struct timeval *result);
--- a/src/go.c
+++ b/src/go.c
@ -5,13 +5,12 @@
 #include "blas.h"
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 int inverted = 1;
 int noi = 1;
-//static const int nind = 5;
+static const unsigned int n_ind = 5;
 #define nind 5
 typedef struct {
    char **data;
@ -22,7 +21,7 @@ char *fgetgo(FILE *fp)
 {
    if(feof(fp)) return 0;
    size_t size = 94;
-    char *line = malloc(size*sizeof(char));
+    char* line = (char*)malloc(size * sizeof(char));
    if(size != fread(line, sizeof(char), size, fp)){
        free(line);
        return 0;
@ -35,21 +34,21 @@ moves load_go_moves(char *filename)
 {
    moves m;
    m.n = 128;
-    m.data = calloc(128, sizeof(char*));
+    m.data = (char**)calloc(128, sizeof(char*));
    FILE *fp = fopen(filename, "rb");
    int count = 0;
    char *line = 0;
    while((line = fgetgo(fp))){
        if(count >= m.n){
            m.n *= 2;
-            m.data = realloc(m.data, m.n*sizeof(char*));
+            m.data = (char**)realloc(m.data, m.n * sizeof(char*));
        }
        m.data[count] = line;
        ++count;
    }
    printf("%d\n", count);
    m.n = count;
-    m.data = realloc(m.data, count*sizeof(char*));
+    m.data = (char**)realloc(m.data, count * sizeof(char*));
    return m;
 }
@ -127,12 +126,12 @@ void train_go(char *cfgfile, char *weightfile)
    }
    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
    char buff[256];
-    float *board = calloc(19*19*net.batch, sizeof(float));
+    float* board = (float*)calloc(19 * 19 * net.batch, sizeof(float));
-    float *move = calloc(19*19*net.batch, sizeof(float));
+    float* move = (float*)calloc(19 * 19 * net.batch, sizeof(float));
-    moves m = load_go_moves("/home/pjreddie/backup/go.train");
+    moves m = load_go_moves("backup/go.train");
    //moves m = load_go_moves("games.txt");
    int N = m.n;
@ -187,7 +186,7 @@ void propagate_liberty(float *board, int *lib, int *visited, int row, int col, i
 int *calculate_liberties(float *board)
 {
-    int *lib = calloc(19*19, sizeof(int));
+    int* lib = (int*)calloc(19 * 19, sizeof(int));
    int visited[361];
    int i, j;
    for(j = 0; j < 19; ++j){
@ -222,7 +221,7 @@ void print_board(float *board, int swap, int *indexes)
            int index = j*19 + i;
            if(indexes){
                int found = 0;
-                for(n = 0; n < nind; ++n){
+                for (n = 0; n < n_ind; ++n) {
                    if(index == indexes[n]){
                        found = 1;
                        /*
@ -365,9 +364,9 @@ int generate_move(network net, int player, float *board, int multi, float thresh
        }
    }
-    int indexes[nind];
+    int indexes[n_ind];
-    top_k(move, 19*19, nind, indexes);
+    top_k(move, 19*19, n_ind, indexes);
-    if(thresh > move[indexes[0]]) thresh = move[indexes[nind-1]];
+    if(thresh > move[indexes[0]]) thresh = move[indexes[n_ind-1]];
    for(i = 0; i < 19; ++i){
        for(j = 0; j < 19; ++j){
@ -382,12 +381,12 @@ int generate_move(network net, int player, float *board, int multi, float thresh
    int index = sample_array(move, 19*19);
    if(print){
-        top_k(move, 19*19, nind, indexes);
+        top_k(move, 19*19, n_ind, indexes);
-        for(i = 0; i < nind; ++i){
+        for(i = 0; i < n_ind; ++i){
            if (!move[indexes[i]]) indexes[i] = -1;
        }
        print_board(board, player, indexes);
-        for(i = 0; i < nind; ++i){
+        for(i = 0; i < n_ind; ++i){
            fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]);
        }
    }
@ -411,9 +410,9 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
    set_batch_network(&net, 1);
    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    float *board = calloc(19*19, sizeof(float));
+    float* board = (float*)calloc(19 * 19, sizeof(float));
-    float *move = calloc(19*19, sizeof(float));
+    float* move = (float*)calloc(19 * 19, sizeof(float));
-    moves m = load_go_moves("/home/pjreddie/backup/go.test");
+    moves m = load_go_moves("backup/go.test");
    int N = m.n;
    int i;
@ -439,9 +438,9 @@ void engine_go(char *filename, char *weightfile, int multi)
    }
    srand(time(0));
    set_batch_network(&net, 1);
-    float *board = calloc(19*19, sizeof(float));
+    float* board = (float*)calloc(19 * 19, sizeof(float));
-    char *one = calloc(91, sizeof(char));
+    char* one = (char*)calloc(91, sizeof(char));
-    char *two = calloc(91, sizeof(char));
+    char* two = (char*)calloc(91, sizeof(char));
    int passed = 0;
    while(1){
        char buff[256];
@ -612,8 +611,8 @@ void test_go(char *cfg, char *weights, int multi)
    }
    srand(time(0));
    set_batch_network(&net, 1);
-    float *board = calloc(19*19, sizeof(float));
+    float* board = (float*)calloc(19 * 19, sizeof(float));
-    float *move = calloc(19*19, sizeof(float));
+    float* move = (float*)calloc(19 * 19, sizeof(float));
    int color = 1;
    while(1){
        float *output = network_predict(net, board);
@ -642,11 +641,11 @@ void test_go(char *cfg, char *weights, int multi)
            if(board[i]) move[i] = 0;
        }
-        int indexes[nind];
+        int indexes[n_ind];
        int row, col;
-        top_k(move, 19*19, nind, indexes);
+        top_k(move, 19 * 19, n_ind, indexes);
        print_board(board, color, indexes);
-        for(i = 0; i < nind; ++i){
+        for (i = 0; i < n_ind; ++i) {
            int index = indexes[i];
            row = index / 19;
            col = index % 19;
@ -664,7 +663,7 @@ void test_go(char *cfg, char *weights, int multi)
        int cnum = sscanf(line, "%c", &c);
        if (strlen(line) == 0 || dnum) {
            --picked;
-            if (picked < nind){
+            if (picked < n_ind){
                int index = indexes[picked];
                row = index / 19;
                col = index % 19;
@ -764,9 +763,9 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
    int count = 0;
    set_batch_network(&net, 1);
    set_batch_network(&net2, 1);
-    float *board = calloc(19*19, sizeof(float));
+    float* board = (float*)calloc(19 * 19, sizeof(float));
-    char *one = calloc(91, sizeof(char));
+    char* one = (char*)calloc(91, sizeof(char));
-    char *two = calloc(91, sizeof(char));
+    char* two = (char*)calloc(91, sizeof(char));
    int done = 0;
    int player = 1;
    int p1 = 0;
--- a/src/gru_layer.c
+++ b/src/gru_layer.c
@ -30,42 +30,42 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
 {
    fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs);
    batch = batch / steps;
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
    l.batch = batch;
    l.type = GRU;
    l.steps = steps;
    l.inputs = inputs;
-    l.input_z_layer = malloc(sizeof(layer));
+    l.input_z_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.input_z_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.input_z_layer->batch = batch;
-    l.state_z_layer = malloc(sizeof(layer));
+    l.state_z_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.state_z_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.state_z_layer->batch = batch;
-    l.input_r_layer = malloc(sizeof(layer));
+    l.input_r_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.input_r_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.input_r_layer->batch = batch;
-    l.state_r_layer = malloc(sizeof(layer));
+    l.state_r_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.state_r_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.state_r_layer->batch = batch;
-    l.input_h_layer = malloc(sizeof(layer));
+    l.input_h_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.input_h_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.input_h_layer->batch = batch;
-    l.state_h_layer = malloc(sizeof(layer));
+    l.state_h_layer = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.state_h_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.state_h_layer->batch = batch;
@ -74,16 +74,16 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
    l.outputs = outputs;
-    l.output = calloc(outputs*batch*steps, sizeof(float));
+    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.delta = calloc(outputs*batch*steps, sizeof(float));
+    l.delta = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.state = calloc(outputs*batch, sizeof(float));
+    l.state = (float*)calloc(outputs * batch, sizeof(float));
-    l.prev_state = calloc(outputs*batch, sizeof(float));
+    l.prev_state = (float*)calloc(outputs * batch, sizeof(float));
-    l.forgot_state = calloc(outputs*batch, sizeof(float));
+    l.forgot_state = (float*)calloc(outputs * batch, sizeof(float));
-    l.forgot_delta = calloc(outputs*batch, sizeof(float));
+    l.forgot_delta = (float*)calloc(outputs * batch, sizeof(float));
-    l.r_cpu = calloc(outputs*batch, sizeof(float));
+    l.r_cpu = (float*)calloc(outputs * batch, sizeof(float));
-    l.z_cpu = calloc(outputs*batch, sizeof(float));
+    l.z_cpu = (float*)calloc(outputs * batch, sizeof(float));
-    l.h_cpu = calloc(outputs*batch, sizeof(float));
+    l.h_cpu = (float*)calloc(outputs * batch, sizeof(float));
    l.forward = forward_gru_layer;
    l.backward = backward_gru_layer;
--- a/src/gru_layer.h
+++ b/src/gru_layer.h
@ -6,6 +6,9 @@
 #include "layer.h"
 #include "network.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
 void forward_gru_layer(layer l, network_state state);
@ -20,5 +23,8 @@ void push_gru_layer(layer l);
 void pull_gru_layer(layer l);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/http_stream.cpp
+++ b/src/http_stream.cpp
@ -1,3 +1,4 @@
 #include "image.h"
 #include "http_stream.h"
 #ifdef OPENCV
@ -17,9 +18,10 @@ using std::endl;
 // socket related abstractions:
 //
 #ifdef _WIN32
 #ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "ws2_32.lib")
-#include <winsock.h>
+#endif
-#include <windows.h>
+#include "gettimeofday.h"
 #include <time.h>
 #define PORT        unsigned long
 #define ADDRPOINTER   int*
@ -44,7 +46,7 @@ static int close_socket(SOCKET s) {
    return result;
 }
 #else   // nix
-#include <unistd.h>
+#include "darkunistd.h"
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@ -85,16 +87,15 @@ static int close_socket(SOCKET s) {
 #endif // _WIN32
-#include "opencv2/opencv.hpp"
+#include <opencv2/opencv.hpp>
-#include "opencv2/highgui/highgui.hpp"
+#include <opencv2/highgui/highgui.hpp>
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/imgproc/imgproc_c.h>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio.hpp"
+#include <opencv2/videoio/videoio.hpp>
 #endif
 using namespace cv;
 #include "image.h"
 class MJPG_sender
@ -195,7 +196,8 @@ public:
        std::vector<int> params;
        params.push_back(IMWRITE_JPEG_QUALITY);
        params.push_back(quality);
-        cv::imencode(".jpg", frame, outbuf, params);
+        //cv::imencode(".jpg", frame, outbuf, params);  //REMOVED FOR COMPATIBILITY
        std::cerr << "cv::imencode call disabled!" << std::endl;
        size_t outlen = outbuf.size();
 #ifdef _WIN32
@ -227,17 +229,17 @@ public:
                }
                maxfd = (maxfd>client ? maxfd : client);
                FD_SET(client, &master);
-                _write(client, "HTTP/1.0 200 OK\r\n", 0);
+                _write(client, "HTTP/1.0 200 OK\n", 0);
                _write(client,
-                    "Server: Mozarella/2.2\r\n"
+                    "Server: Mozarella/2.2\n"
-                    "Accept-Range: bytes\r\n"
+                    "Accept-Range: bytes\n"
-                    "Connection: close\r\n"
+                    "Connection: close\n"
-                    "Max-Age: 0\r\n"
+                    "Max-Age: 0\n"
-                    "Expires: 0\r\n"
+                    "Expires: 0\n"
-                    "Cache-Control: no-cache, private\r\n"
+                    "Cache-Control: no-cache, private\n"
-                    "Pragma: no-cache\r\n"
+                    "Pragma: no-cache\n"
-                    "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n"
+                    "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\n"
-                    "\r\n", 0);
+                    "\n", 0);
                cerr << "MJPG_sender: new client " << client << endl;
            }
            else // existing client, just stream pix
@ -249,7 +251,7 @@ public:
                }
                char head[400];
-                sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen);
+                sprintf(head, "--mjpegstream\nContent-Type: image/jpeg\nContent-Length: %zu\n\n", outlen);
                _write(s, head, 0);
                int n = _write(s, (char*)(&outbuf[0]), outlen);
                //cerr << "known client " << s << " " << n << endl;
@ -406,18 +408,18 @@ public:
                }
                maxfd = (maxfd>client ? maxfd : client);
                FD_SET(client, &master);
-                _write(client, "HTTP/1.0 200 OK\r\n", 0);
+                _write(client, "HTTP/1.0 200 OK\n", 0);
                _write(client,
-                    "Server: Mozarella/2.2\r\n"
+                    "Server: Mozarella/2.2\n"
-                    "Accept-Range: bytes\r\n"
+                    "Accept-Range: bytes\n"
-                    "Connection: close\r\n"
+                    "Connection: close\n"
-                    "Max-Age: 0\r\n"
+                    "Max-Age: 0\n"
-                    "Expires: 0\r\n"
+                    "Expires: 0\n"
-                    "Cache-Control: no-cache, private\r\n"
+                    "Cache-Control: no-cache, private\n"
-                    "Pragma: no-cache\r\n"
+                    "Pragma: no-cache\n"
-                    "Content-Type: application/json\r\n"
+                    "Content-Type: application/json\n"
                    //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n"
-                    "\r\n", 0);
+                    "\n", 0);
                _write(client, "[\n", 0);   // open JSON array
                int n = _write(client, outputbuf, outlen);
                cerr << "JSON_sender: new client " << client << endl;
@ -473,7 +475,7 @@ void send_json(detection *dets, int nboxes, int classes, char **names, long long
 // ----------------------------------------
-CvCapture* get_capture_video_stream(char *path) {
+CvCapture* get_capture_video_stream(const char *path) {
    CvCapture* cap = NULL;
    try {
        cap = (CvCapture*)new cv::VideoCapture(path);
@ -511,7 +513,7 @@ IplImage* get_webcam_frame(CvCapture *cap) {
            src = cvCloneImage(&tmp);
        }
        else {
-            std::cout << " Video-stream stoped! \n";
+            std::cout << " Video-stream stopped! \n";
        }
    }
    catch (...) {
@ -536,9 +538,6 @@ int get_stream_fps_cpp(CvCapture *cap) {
    return fps;
 }
 // ----------------------------------------
 extern "C" {
    image ipl_to_image(IplImage* src);    // image.c
 }
 image image_data_augmentation(IplImage* ipl, int w, int h,
    int pleft, int ptop, int swidth, int sheight, int flip,
@ -701,4 +700,4 @@ void stop_timer_and_show() {
 }
 void stop_timer_and_show_name(char *name) { stop_timer_and_show(); }
 void total_time() {}
-#endif // C++11
+#endif // C++11
--- a/src/http_stream.h
+++ b/src/http_stream.h
@ -1,11 +1,14 @@
 #pragma once
 #ifndef HTTP_STREAM_H
 #define HTTP_STREAM_H
 #include "darknet.h"
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/core/version.hpp>
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #include <opencv2/imgproc/imgproc_c.h>
 #ifndef CV_VERSION_EPOCH
 #include <opencv2/videoio/videoio_c.h>
 #endif
 #endif
 #ifdef __cplusplus
@ -18,7 +21,7 @@ extern "C" {
 void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout);
 void send_mjpeg(IplImage* ipl, int port, int timeout, int quality);
 CvCapture* get_capture_webcam(int index);
-CvCapture* get_capture_video_stream(char *path);
+CvCapture* get_capture_video_stream(const char *path);
 IplImage* get_webcam_frame(CvCapture *cap);
 int get_stream_fps_cpp(CvCapture *cap);
--- a/src/im2col.h
+++ b/src/im2col.h
@ -5,9 +5,14 @@
 #include <stdint.h>
 #include "darknet.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void im2col_cpu(float* data_im,
        int channels, int height, int width,
        int ksize, int stride, int pad, float* data_col);
 float im2col_get_pixel(float* im, int height, int width, int channels,
    int row, int col, int channel, int pad);
 #ifdef GPU
@ -63,5 +68,8 @@ void convolve_bin_cpu(float *input, float *weights, float *output, int in_w, int
 void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/im2col_kernels.cu
+++ b/src/im2col_kernels.cu
@ -1,19 +1,15 @@
-#include "cuda_runtime.h"
+#include <cuda_runtime.h>
-#include "curand.h"
+#include <curand.h>
-#include "cublas_v2.h"
+#include <cublas_v2.h>
 #include <stdint.h>
 extern "C" {
 #include "im2col.h"
 #include "cuda.h"
 }
 #include <stdio.h>
 #include <assert.h>
-#include <cuda.h>
+//#include <cuda.h>
 #define FULL_MASK 0xffffffff
 #define WARP_SIZE 32
 template<typename T1, typename T2>
 __device__ inline T1 __shfl_custom(T1 val, T2 lane) {
@ -154,11 +150,6 @@ __global__ void im2col_align_gpu_kernel(const int n, const float* data_im,
 {
    //__shared__ float tmp_s[1];
 //#define SHRED_VALS ((BLOCK / 169) * )
    //__shared__ float dst_s[1024];
    //__shared__ float dst_s[1024];
    //__shared__ uint32_t bit_s[32];
    //__shared__ uint8_t bit_s[128];
    int index = blockIdx.x*blockDim.x + threadIdx.x;
    for (; index < n; index += blockDim.x*gridDim.x) {
@ -604,8 +595,7 @@ __device__ void transpose32_optimized(uint32_t A[32]) {
    }
 }
-#define BLOCK_TRANSPOSE32 256
+extern "C" {
 __device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n)
 {
    //unsigned A_tmp[32];
@ -626,7 +616,7 @@ __device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B
    #pragma unroll 32
    for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i];
 }
-
+}
 // transpose 32x32 bit
 __global__ void transpose_bin_gpu_kernel_32(uint32_t *A, uint32_t *B, const int n, const int m,
--- a/src/image.c
+++ b/src/image.c
@ -1,4 +1,3 @@
 #include "darknet.h"
 #include "image.h"
 #include "utils.h"
 #include "blas.h"
@ -6,25 +5,31 @@
 #include <stdio.h>
 #include <math.h>
 #ifndef STB_IMAGE_IMPLEMENTATION
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
 #endif
 #ifndef STB_IMAGE_WRITE_IMPLEMENTATION
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include "stb_image_write.h"
 #endif
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/imgproc/imgproc_c.h>
-#include "opencv2/core/types_c.h"
+#include <opencv2/core/types_c.h>
-#include "opencv2/core/version.hpp"
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
-#include "opencv2/imgcodecs/imgcodecs_c.h"
+#include <opencv2/imgcodecs/imgcodecs_c.h>
 #include "http_stream.h"
 #endif
 #include "http_stream.h"
 #ifndef CV_RGB
 #define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
 #endif
 #endif
 extern int check_mistakes;
 int windows = 0;
@ -255,9 +260,9 @@ image **load_alphabet()
 {
    int i, j;
    const int nsize = 8;
-    image **alphabets = calloc(nsize, sizeof(image));
+    image** alphabets = (image**)calloc(nsize, sizeof(image*));
    for(j = 0; j < nsize; ++j){
-        alphabets[j] = calloc(128, sizeof(image));
+        alphabets[j] = (image*)calloc(128, sizeof(image));
        for(i = 32; i < 127; ++i){
            char buff[256];
            sprintf(buff, "data/labels/%d_%d.png", i, j);
@ -273,7 +278,7 @@ image **load_alphabet()
 detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names)
 {
    int selected_num = 0;
-    detection_with_class* result_arr = calloc(dets_num, sizeof(detection_with_class));
+    detection_with_class* result_arr = (detection_with_class*)calloc(dets_num, sizeof(detection_with_class));
    int i;
    for (i = 0; i < dets_num; ++i) {
        int best_class = -1;
@ -505,7 +510,7 @@ void save_cv_png(IplImage *img, const char *name)
    IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
    cvCvtColor(img, img_rgb, CV_RGB2BGR);
    stbi_write_png(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 0);
-    cvRelease(&img_rgb);
+    cvRelease((void**)&img_rgb);
 }
 void save_cv_jpg(IplImage *img, const char *name)
@ -513,7 +518,7 @@ void save_cv_jpg(IplImage *img, const char *name)
    IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
    cvCvtColor(img, img_rgb, CV_RGB2BGR);
    stbi_write_jpg(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 80);
-    cvRelease(&img_rgb);
+    cvRelease((void**)&img_rgb);
 }
 void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output)
@ -952,8 +957,8 @@ void normalize_image(image p)
 void normalize_image2(image p)
 {
-    float *min = calloc(p.c, sizeof(float));
+    float* min = (float*)calloc(p.c, sizeof(float));
-    float *max = calloc(p.c, sizeof(float));
+    float* max = (float*)calloc(p.c, sizeof(float));
    int i,j;
    for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w];
@ -982,7 +987,7 @@ void normalize_image2(image p)
 image copy_image(image p)
 {
    image copy = p;
-    copy.data = calloc(p.h*p.w*p.c, sizeof(float));
+    copy.data = (float*)calloc(p.h * p.w * p.c, sizeof(float));
    memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
    return copy;
 }
@ -1252,7 +1257,7 @@ void save_image_png(image im, const char *name)
    char buff[256];
    //sprintf(buff, "%s (%d)", name, windows);
    sprintf(buff, "%s.png", name);
-    unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
+    unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
    int i,k;
    for(k = 0; k < im.c; ++k){
        for(i = 0; i < im.w*im.h; ++i){
@ -1273,7 +1278,7 @@ void save_image_options(image im, const char *name, IMTYPE f, int quality)
    else if (f == TGA) sprintf(buff, "%s.tga", name);
    else if (f == JPG) sprintf(buff, "%s.jpg", name);
    else               sprintf(buff, "%s.png", name);
-    unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
+    unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
    int i, k;
    for (k = 0; k < im.c; ++k) {
        for (i = 0; i < im.w*im.h; ++i) {
@ -1331,14 +1336,14 @@ image make_empty_image(int w, int h, int c)
 image make_image(int w, int h, int c)
 {
    image out = make_empty_image(w,h,c);
-    out.data = calloc(h*w*c, sizeof(float));
+    out.data = (float*)calloc(h * w * c, sizeof(float));
    return out;
 }
 image make_random_image(int w, int h, int c)
 {
    image out = make_empty_image(w,h,c);
-    out.data = calloc(h*w*c, sizeof(float));
+    out.data = (float*)calloc(h * w * c, sizeof(float));
    int i;
    for(i = 0; i < w*h*c; ++i){
        out.data[i] = (rand_normal() * .25) + .5;
@ -1585,7 +1590,7 @@ image random_augment_image(image im, float angle, float aspect, int low, int hig
    int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
    float scale = (float)r / min;
-    float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
+    float rad = rand_uniform(-angle, angle) * 2.0 * M_PI / 360.;
    float dx = (im.w*scale/aspect - size) / 2.;
    float dy = (im.h*scale - size) / 2.;
--- a/src/image.h
+++ b/src/image.h
@ -7,8 +7,20 @@
 #include <float.h>
 #include <string.h>
 #include <math.h>
 #ifdef OPENCV
 #include <opencv2/highgui/highgui_c.h>
 #include <opencv2/imgproc/imgproc_c.h>
 #include <opencv2/core/types_c.h>
 #include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
 #include <opencv2/videoio/videoio_c.h>
 #include <opencv2/imgcodecs/imgcodecs_c.h>
 #endif
 #endif
 #include "box.h"
-
+#ifdef __cplusplus
 extern "C" {
 #endif
 /*
 typedef struct {
    int w;
@ -80,6 +92,11 @@ image load_image(char *filename, int w, int h, int c);
 //LIB_API image load_image_color(char *filename, int w, int h);
 image **load_alphabet();
 #ifdef OPENCV
 LIB_API image get_image_from_stream(CvCapture* cap);
 LIB_API image get_image_from_stream_cpp(CvCapture* cap);
 LIB_API image ipl_to_image(IplImage* src);
 #endif
 //float get_pixel(image m, int x, int y, int c);
 //float get_pixel_extend(image m, int x, int y, int c);
 //void set_pixel(image m, int x, int y, int c, float val);
@ -90,5 +107,8 @@ image get_image_layer(image m, int l);
 //LIB_API void free_image(image m);
 void test_resize(char *filename);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/layer.c
+++ b/src/layer.c
@ -98,7 +98,7 @@ void free_layer(layer l)
 	if (l.x_gpu)                   cuda_free(l.x_gpu);
 	if (l.x_norm_gpu)              cuda_free(l.x_norm_gpu);
-    if (l.align_bit_weights_gpu)   cuda_free(l.align_bit_weights_gpu);
+    if (l.align_bit_weights_gpu)   cuda_free((float *)l.align_bit_weights_gpu);
    if (l.mean_arr_gpu)            cuda_free(l.mean_arr_gpu);
    if (l.align_workspace_gpu)     cuda_free(l.align_workspace_gpu);
    if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu);
--- a/src/layer.h
+++ b/src/layer.h
@ -1,10 +1,12 @@
 #ifndef BASE_LAYER_H
 #define BASE_LAYER_H
 #include "darknet.h"
 #include "activations.h"
 #include "stddef.h"
 #include "tree.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 //struct network_state;
@ -330,4 +332,7 @@ struct layer{
 */
 //void free_layer(layer);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/list.c
+++ b/src/list.c
@ -5,7 +5,7 @@
 list *make_list()
 {
-    list *l = malloc(sizeof(list));
+    list* l = (list*)malloc(sizeof(list));
    l->size = 0;
    l->front = 0;
    l->back = 0;
@ -40,18 +40,18 @@ void *list_pop(list *l){
 void list_insert(list *l, void *val)
 {
-    node *new = malloc(sizeof(node));
+    node* newnode = (node*)malloc(sizeof(node));
-    new->val = val;
+    newnode->val = val;
-    new->next = 0;
+    newnode->next = 0;
    if(!l->back){
-        l->front = new;
+        l->front = newnode;
-        new->prev = 0;
+        newnode->prev = 0;
    }else{
-        l->back->next = new;
+        l->back->next = newnode;
-        new->prev = l->back;
+        newnode->prev = l->back;
    }
-    l->back = new;
+    l->back = newnode;
    ++l->size;
 }
@ -84,7 +84,7 @@ void free_list_contents_kvp(list *l)
 {
    node *n = l->front;
    while (n) {
-        kvp *p = n->val;
+        kvp* p = (kvp*)n->val;
        free(p->key);
        free(n->val);
        n = n->next;
@ -93,7 +93,7 @@ void free_list_contents_kvp(list *l)
 void **list_to_array(list *l)
 {
-    void **a = calloc(l->size, sizeof(void*));
+    void** a = (void**)calloc(l->size, sizeof(void*));
    int count = 0;
    node *n = l->front;
    while(n){
--- a/src/list.h
+++ b/src/list.h
@ -13,6 +13,9 @@ typedef struct list{
    node *back;
 } list;
 #ifdef __cplusplus
 extern "C" {
 #endif
 list *make_list();
 int list_find(list *l, void *val);
@ -24,4 +27,7 @@ void free_list(list *l);
 void free_list_contents(list *l);
 void free_list_contents_kvp(list *l);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/local_layer.c
+++ b/src/local_layer.c
@ -26,7 +26,7 @@ int local_out_width(local_layer l)
 local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
 {
    int i;
-    local_layer l = {0};
+    local_layer l = { (LAYER_TYPE)0 };
    l.type = LOCAL;
    l.h = h;
@ -47,19 +47,19 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = l.w * l.h * l.c;
-    l.weights = calloc(c*n*size*size*locations, sizeof(float));
+    l.weights = (float*)calloc(c * n * size * size * locations, sizeof(float));
-    l.weight_updates = calloc(c*n*size*size*locations, sizeof(float));
+    l.weight_updates = (float*)calloc(c * n * size * size * locations, sizeof(float));
-    l.biases = calloc(l.outputs, sizeof(float));
+    l.biases = (float*)calloc(l.outputs, sizeof(float));
-    l.bias_updates = calloc(l.outputs, sizeof(float));
+    l.bias_updates = (float*)calloc(l.outputs, sizeof(float));
    // float scale = 1./sqrt(size*size*c);
    float scale = sqrt(2./(size*size*c));
    for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
-    l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
+    l.col_image = (float*)calloc(out_h * out_w * size * size * c, sizeof(float));
-    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
+    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
-    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));
+    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
    l.forward = forward_local_layer;
    l.backward = backward_local_layer;
--- a/src/local_layer.h
+++ b/src/local_layer.h
@ -9,6 +9,9 @@
 typedef layer local_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 #ifdef GPU
 void forward_local_layer_gpu(local_layer layer, network_state state);
 void backward_local_layer_gpu(local_layer layer, network_state state);
@ -27,5 +30,8 @@ void update_local_layer(local_layer layer, int batch, float learning_rate, float
 void bias_output(float *output, float *biases, int batch, int n, int size);
 void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/lstm_layer.c
+++ b/src/lstm_layer.c
@ -30,7 +30,7 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
 {
    fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs);
    batch = batch / steps;
-    layer l = { 0 };
+    layer l = { (LAYER_TYPE)0 };
    l.batch = batch;
    l.type = LSTM;
    l.steps = steps;
@ -39,49 +39,49 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
    l.out_h = 1;
    l.out_c = outputs;
-    l.uf = malloc(sizeof(layer));
+    l.uf = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.uf->batch = batch;
    if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;
-    l.ui = malloc(sizeof(layer));
+    l.ui = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.ui->batch = batch;
    if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;
-    l.ug = malloc(sizeof(layer));
+    l.ug = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.ug->batch = batch;
    if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;
-    l.uo = malloc(sizeof(layer));
+    l.uo = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.uo->batch = batch;
    if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;
-    l.wf = malloc(sizeof(layer));
+    l.wf = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.wf->batch = batch;
    if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;
-    l.wi = malloc(sizeof(layer));
+    l.wi = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.wi->batch = batch;
    if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;
-    l.wg = malloc(sizeof(layer));
+    l.wg = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.wg->batch = batch;
    if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;
-    l.wo = malloc(sizeof(layer));
+    l.wo = (layer*)malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.wo->batch = batch;
@ -90,27 +90,27 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
    l.batch_normalize = batch_normalize;
    l.outputs = outputs;
-    l.output = calloc(outputs*batch*steps, sizeof(float));
+    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.state = calloc(outputs*batch, sizeof(float));
+    l.state = (float*)calloc(outputs * batch, sizeof(float));
    l.forward = forward_lstm_layer;
    l.update = update_lstm_layer;
-    l.prev_state_cpu =  calloc(batch*outputs, sizeof(float));
+    l.prev_state_cpu =  (float*)calloc(batch*outputs, sizeof(float));
-    l.prev_cell_cpu =   calloc(batch*outputs, sizeof(float));
+    l.prev_cell_cpu =   (float*)calloc(batch*outputs, sizeof(float));
-    l.cell_cpu =        calloc(batch*outputs*steps, sizeof(float));
+    l.cell_cpu =        (float*)calloc(batch*outputs*steps, sizeof(float));
-    l.f_cpu =           calloc(batch*outputs, sizeof(float));
+    l.f_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.i_cpu =           calloc(batch*outputs, sizeof(float));
+    l.i_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.g_cpu =           calloc(batch*outputs, sizeof(float));
+    l.g_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.o_cpu =           calloc(batch*outputs, sizeof(float));
+    l.o_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.c_cpu =           calloc(batch*outputs, sizeof(float));
+    l.c_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.h_cpu =           calloc(batch*outputs, sizeof(float));
+    l.h_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.temp_cpu =        calloc(batch*outputs, sizeof(float));
+    l.temp_cpu =        (float*)calloc(batch*outputs, sizeof(float));
-    l.temp2_cpu =       calloc(batch*outputs, sizeof(float));
+    l.temp2_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.temp3_cpu =       calloc(batch*outputs, sizeof(float));
+    l.temp3_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.dc_cpu =          calloc(batch*outputs, sizeof(float));
+    l.dc_cpu =          (float*)calloc(batch*outputs, sizeof(float));
-    l.dh_cpu =          calloc(batch*outputs, sizeof(float));
+    l.dh_cpu =          (float*)calloc(batch*outputs, sizeof(float));
 #ifdef GPU
    l.forward_gpu = forward_lstm_layer_gpu;
--- a/src/lstm_layer.h
+++ b/src/lstm_layer.h
@ -6,7 +6,10 @@
 #include "network.h"
 #define USET
-layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
+#ifdef __cplusplus
 extern "C" {
 #endif
 LIB_API layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
 void forward_lstm_layer(layer l, network_state state); 
 void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay);
@ -15,6 +18,9 @@ void update_lstm_layer(layer l, int batch, float learning_rate, float momentum,
 void forward_lstm_layer_gpu(layer l, network_state state);
 void backward_lstm_layer_gpu(layer l, network_state state);
 void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); 
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/matrix.c
+++ b/src/matrix.c
@ -15,7 +15,7 @@ void free_matrix(matrix m)
 float matrix_topk_accuracy(matrix truth, matrix guess, int k)
 {
-    int *indexes = calloc(k, sizeof(int));
+    int* indexes = (int*)calloc(k, sizeof(int));
    int n = truth.cols;
    int i,j;
    int correct = 0;
@ -48,15 +48,15 @@ matrix resize_matrix(matrix m, int size)
    int i;
    if (m.rows == size) return m;
    if (m.rows < size) {
-        m.vals = realloc(m.vals, size*sizeof(float*));
+        m.vals = (float**)realloc(m.vals, size * sizeof(float*));
        for (i = m.rows; i < size; ++i) {
-            m.vals[i] = calloc(m.cols, sizeof(float));
+            m.vals[i] = (float*)calloc(m.cols, sizeof(float));
        }
    } else if (m.rows > size) {
        for (i = size; i < m.rows; ++i) {
            free(m.vals[i]);
        }
-        m.vals = realloc(m.vals, size*sizeof(float*));
+        m.vals = (float**)realloc(m.vals, size * sizeof(float*));
    }
    m.rows = size;
    return m;
@ -79,9 +79,9 @@ matrix make_matrix(int rows, int cols)
    matrix m;
    m.rows = rows;
    m.cols = cols;
-    m.vals = calloc(m.rows, sizeof(float *));
+    m.vals = (float**)calloc(m.rows, sizeof(float*));
    for(i = 0; i < m.rows; ++i){
-        m.vals[i] = calloc(m.cols, sizeof(float));
+        m.vals[i] = (float*)calloc(m.cols, sizeof(float));
    }
    return m;
 }
@ -92,7 +92,7 @@ matrix hold_out_matrix(matrix *m, int n)
    matrix h;
    h.rows = n;
    h.cols = m->cols;
-    h.vals = calloc(h.rows, sizeof(float *));
+    h.vals = (float**)calloc(h.rows, sizeof(float*));
    for(i = 0; i < n; ++i){
        int index = rand()%m->rows;
        h.vals[i] = m->vals[index];
@ -103,7 +103,7 @@ matrix hold_out_matrix(matrix *m, int n)
 float *pop_column(matrix *m, int c)
 {
-    float *col = calloc(m->rows, sizeof(float));
+    float* col = (float*)calloc(m->rows, sizeof(float));
    int i, j;
    for(i = 0; i < m->rows; ++i){
        col[i] = m->vals[i][c];
@ -127,18 +127,18 @@ matrix csv_to_matrix(char *filename)
    int n = 0;
    int size = 1024;
-    m.vals = calloc(size, sizeof(float*));
+    m.vals = (float**)calloc(size, sizeof(float*));
    while((line = fgetl(fp))){
        if(m.cols == -1) m.cols = count_fields(line);
        if(n == size){
            size *= 2;
-            m.vals = realloc(m.vals, size*sizeof(float*));
+            m.vals = (float**)realloc(m.vals, size * sizeof(float*));
        }
        m.vals[n] = parse_fields(line, m.cols);
        free(line);
        ++n;
    }
-    m.vals = realloc(m.vals, n*sizeof(float*));
+    m.vals = (float**)realloc(m.vals, n * sizeof(float*));
    m.rows = n;
    return m;
 }
@ -225,7 +225,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers)
    matrix old_centers = make_matrix(centers.rows, centers.cols);
    int i, j;
-    int *counts = calloc(centers.rows, sizeof(int));
+    int *counts = (int*)calloc(centers.rows, sizeof(int));
    for (i = 0; i < centers.rows; ++i) {
        for (j = 0; j < centers.cols; ++j) {
            old_centers.vals[i][j] = centers.vals[i][j];
@ -268,7 +268,7 @@ void random_centers(matrix data, matrix centers) {
 int *sample(int n)
 {
    int i;
-    int *s = calloc(n, sizeof(int));
+    int* s = (int*)calloc(n, sizeof(int));
    for (i = 0; i < n; ++i) s[i] = i;
    for (i = n - 1; i >= 0; --i) {
        int swap = s[i];
@ -301,7 +301,7 @@ void copy(float *x, float *y, int n)
 model do_kmeans(matrix data, int k)
 {
    matrix centers = make_matrix(k, data.cols);
-    int *assignments = calloc(data.rows, sizeof(int));
+    int* assignments = (int*)calloc(data.rows, sizeof(int));
    //smart_centers(data, centers);
    random_centers(data, centers);  // IoU = 67.31% after kmeans
--- a/src/matrix.h
+++ b/src/matrix.h
@ -12,6 +12,9 @@ typedef struct {
    matrix centers;
 } model;
 #ifdef __cplusplus
 extern "C" {
 #endif
 model do_kmeans(matrix data, int k);
 matrix make_matrix(int rows, int cols);
@ -28,4 +31,7 @@ matrix resize_matrix(matrix m, int size);
 float *pop_column(matrix *m, int c);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@ -47,7 +47,7 @@ void cudnn_maxpool_setup(layer *l)
 maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
 {
-    maxpool_layer l = {0};
+    maxpool_layer l = { (LAYER_TYPE)0 };
    l.type = MAXPOOL;
    l.batch = batch;
    l.h = h;
@ -62,9 +62,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
    l.size = size;
    l.stride = stride;
    int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.indexes = calloc(output_size, sizeof(int));
+    l.indexes = (int*)calloc(output_size, sizeof(int));
-    l.output =  calloc(output_size, sizeof(float));
+    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta =   calloc(output_size, sizeof(float));
+    l.delta = (float*)calloc(output_size, sizeof(float));
    l.forward = forward_maxpool_layer;
    l.backward = backward_maxpool_layer;
    #ifdef GPU
@ -93,9 +93,9 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
    l->outputs = l->out_w * l->out_h * l->c;
    int output_size = l->outputs * l->batch;
-    l->indexes = realloc(l->indexes, output_size * sizeof(int));
+    l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int));
-    l->output = realloc(l->output, output_size * sizeof(float));
+    l->output = (float*)realloc(l->output, output_size * sizeof(float));
-    l->delta = realloc(l->delta, output_size * sizeof(float));
+    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
 #ifdef GPU
    CHECK_CUDA(cudaFree((float *)l->indexes_gpu));
--- a/src/maxpool_layer.h
+++ b/src/maxpool_layer.h
@ -8,6 +8,9 @@
 typedef layer maxpool_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 image get_maxpool_image(maxpool_layer l);
 maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
 void resize_maxpool_layer(maxpool_layer *l, int w, int h);
@ -20,5 +23,8 @@ void backward_maxpool_layer_gpu(maxpool_layer l, network_state state);
 void cudnn_maxpool_setup(maxpool_layer *l);
 #endif // GPU
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/maxpool_layer_kernels.cu
+++ b/src/maxpool_layer_kernels.cu
@ -2,10 +2,8 @@
 #include "curand.h"
 #include "cublas_v2.h"
 extern "C" {
 #include "maxpool_layer.h"
 #include "cuda.h"
 }
 __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
 {
--- a/src/network.c
+++ b/src/network.c
@ -180,16 +180,16 @@ network make_network(int n)
 {
    network net = {0};
    net.n = n;
-    net.layers = calloc(net.n, sizeof(layer));
+    net.layers = (layer*)calloc(net.n, sizeof(layer));
-    net.seen = calloc(1, sizeof(uint64_t));
+    net.seen = (uint64_t*)calloc(1, sizeof(uint64_t));
 #ifdef GPU
-    net.input_gpu = calloc(1, sizeof(float *));
+    net.input_gpu = (float**)calloc(1, sizeof(float*));
-    net.truth_gpu = calloc(1, sizeof(float *));
+    net.truth_gpu = (float**)calloc(1, sizeof(float*));
-    net.input16_gpu = calloc(1, sizeof(float *));
+    net.input16_gpu = (float**)calloc(1, sizeof(float*));
-    net.output16_gpu = calloc(1, sizeof(float *));
+    net.output16_gpu = (float**)calloc(1, sizeof(float*));
-    net.max_input16_size = calloc(1, sizeof(size_t));
+    net.max_input16_size = (size_t*)calloc(1, sizeof(size_t));
-    net.max_output16_size = calloc(1, sizeof(size_t));
+    net.max_output16_size = (size_t*)calloc(1, sizeof(size_t));
 #endif
    return net;
 }
@ -300,8 +300,8 @@ float train_network_datum(network net, float *x, float *y)
 float train_network_sgd(network net, data d, int n)
 {
    int batch = net.batch;
-    float *X = calloc(batch*d.X.cols, sizeof(float));
+    float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
-    float *y = calloc(batch*d.y.cols, sizeof(float));
+    float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
    int i;
    float sum = 0;
@ -320,8 +320,8 @@ float train_network(network net, data d)
    assert(d.X.rows % net.batch == 0);
    int batch = net.batch;
    int n = d.X.rows / batch;
-    float *X = calloc(batch*d.X.cols, sizeof(float));
+    float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
-    float *y = calloc(batch*d.y.cols, sizeof(float));
+    float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
    int i;
    float sum = 0;
@ -389,11 +389,11 @@ int recalculate_workspace_size(network *net)
    }
    else {
        free(net->workspace);
-        net->workspace = calloc(1, workspace_size);
+        net->workspace = (float*)calloc(1, workspace_size);
    }
 #else
    free(net->workspace);
-    net->workspace = calloc(1, workspace_size);
+    net->workspace = (float*)calloc(1, workspace_size);
 #endif
    //fprintf(stderr, " Done!\n");
    return 0;
@ -495,19 +495,19 @@ int resize_network(network *net, int w, int h)
            net->input_pinned_cpu_flag = 1;
        else {
            cudaGetLastError(); // reset CUDA-error
-            net->input_pinned_cpu = calloc(size, sizeof(float));
+            net->input_pinned_cpu = (float*)calloc(size, sizeof(float));
            net->input_pinned_cpu_flag = 0;
        }
        printf(" CUDA allocate done! \n");
    }else {
        free(net->workspace);
-        net->workspace = calloc(1, workspace_size);
+        net->workspace = (float*)calloc(1, workspace_size);
        if(!net->input_pinned_cpu_flag)
-            net->input_pinned_cpu = realloc(net->input_pinned_cpu, size * sizeof(float));
+            net->input_pinned_cpu = (float*)realloc(net->input_pinned_cpu, size * sizeof(float));
    }
 #else
    free(net->workspace);
-    net->workspace = calloc(1, workspace_size);
+    net->workspace = (float*)calloc(1, workspace_size);
 #endif
    //fprintf(stderr, " Done!\n");
    return 0;
@ -534,7 +534,7 @@ detection_layer get_network_detection_layer(network net)
        }
    }
    fprintf(stderr, "Detection layer not found!!\n");
-    detection_layer l = {0};
+    detection_layer l = { (LAYER_TYPE)0 };
    return l;
 }
@ -632,11 +632,11 @@ detection *make_network_boxes(network *net, float thresh, int *num)
    int i;
    int nboxes = num_detections(net, thresh);
    if (num) *num = nboxes;
-    detection *dets = calloc(nboxes, sizeof(detection));
+    detection* dets = (detection*)calloc(nboxes, sizeof(detection));
    for (i = 0; i < nboxes; ++i) {
-        dets[i].prob = calloc(l.classes, sizeof(float));
+        dets[i].prob = (float*)calloc(l.classes, sizeof(float));
        if (l.coords > 4) {
-            dets[i].mask = calloc(l.coords - 4, sizeof(float));
+            dets[i].mask = (float*)calloc(l.coords - 4, sizeof(float));
        }
    }
    return dets;
@ -645,10 +645,10 @@ detection *make_network_boxes(network *net, float thresh, int *num)
 void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter)
 {
-    box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
+    box* boxes = (box*)calloc(l.w * l.h * l.n, sizeof(box));
-    float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
+    float** probs = (float**)calloc(l.w * l.h * l.n, sizeof(float*));
    int i, j;
-    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float));
+    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
    get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
    for (j = 0; j < l.w*l.h*l.n; ++j) {
        dets[j].classes = l.classes;
@ -789,7 +789,7 @@ matrix network_predict_data_multi(network net, data test, int n)
    int i,j,b,m;
    int k = get_network_output_size(net);
    matrix pred = make_matrix(test.X.rows, k);
-    float *X = calloc(net.batch*test.X.rows, sizeof(float));
+    float* X = (float*)calloc(net.batch * test.X.rows, sizeof(float));
    for(i = 0; i < test.X.rows; i += net.batch){
        for(b = 0; b < net.batch; ++b){
            if(i+b == test.X.rows) break;
@ -814,7 +814,7 @@ matrix network_predict_data(network net, data test)
    int i,j,b;
    int k = get_network_output_size(net);
    matrix pred = make_matrix(test.X.rows, k);
-    float *X = calloc(net.batch*test.X.cols, sizeof(float));
+    float* X = (float*)calloc(net.batch * test.X.cols, sizeof(float));
    for(i = 0; i < test.X.rows; i += net.batch){
        for(b = 0; b < net.batch; ++b){
            if(i+b == test.X.rows) break;
--- a/src/network.h
+++ b/src/network.h
@ -6,14 +6,14 @@
 #include <stdint.h>
 #include "layer.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "image.h"
 #include "data.h"
 #include "tree.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*
 typedef enum {
    CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@ -3,7 +3,6 @@
 //#include "cublas_v2.h"
 #include "cuda.h"
 extern "C" {
 #include <stdio.h>
 #include <time.h>
 #include <assert.h>
@ -35,10 +34,9 @@ extern "C" {
 #include "route_layer.h"
 #include "shortcut_layer.h"
 #include "blas.h"
 }
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 #include "http_stream.h"
@ -396,9 +394,11 @@ void sync_nets(network *nets, int n, int interval)
 float train_networks(network *nets, int n, data d, int interval)
 {
    int i;
 #ifdef _DEBUG
    int batch = nets[0].batch;
    int subdivisions = nets[0].subdivisions;
    assert(batch * subdivisions * n == d.X.rows);
 #endif
    pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
    float *errors = (float *) calloc(n, sizeof(float));
--- a/src/nightmare.c
+++ b/src/nightmare.c
@ -5,7 +5,7 @@
 #include "utils.h"
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 // ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2
--- a/src/normalization_layer.c
+++ b/src/normalization_layer.c
@ -5,7 +5,7 @@
 layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
 {
    fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
-    layer layer = {0};
+    layer layer = { (LAYER_TYPE)0 };
    layer.type = NORMALIZATION;
    layer.batch = batch;
    layer.h = layer.out_h = h;
@ -15,10 +15,10 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a
    layer.size = size;
    layer.alpha = alpha;
    layer.beta = beta;
-    layer.output = calloc(h * w * c * batch, sizeof(float));
+    layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.delta = calloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.squared = calloc(h * w * c * batch, sizeof(float));
+    layer.squared = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.norms = calloc(h * w * c * batch, sizeof(float));
+    layer.norms = (float*)calloc(h * w * c * batch, sizeof(float));
    layer.inputs = w*h*c;
    layer.outputs = layer.inputs;
@ -46,10 +46,10 @@ void resize_normalization_layer(layer *layer, int w, int h)
    layer->out_w = w;
    layer->inputs = w*h*c;
    layer->outputs = layer->inputs;
-    layer->output = realloc(layer->output, h * w * c * batch * sizeof(float));
+    layer->output = (float*)realloc(layer->output, h * w * c * batch * sizeof(float));
-    layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float));
+    layer->delta = (float*)realloc(layer->delta, h * w * c * batch * sizeof(float));
-    layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float));
+    layer->squared = (float*)realloc(layer->squared, h * w * c * batch * sizeof(float));
-    layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float));
+    layer->norms = (float*)realloc(layer->norms, h * w * c * batch * sizeof(float));
 #ifdef GPU
    cuda_free(layer->output_gpu);
    cuda_free(layer->delta_gpu); 
--- a/src/normalization_layer.h
+++ b/src/normalization_layer.h
@ -5,6 +5,9 @@
 #include "layer.h"
 #include "network.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
 void resize_normalization_layer(layer *layer, int h, int w);
 void forward_normalization_layer(const layer layer, network_state state);
@ -16,4 +19,7 @@ void forward_normalization_layer_gpu(const layer layer, network_state state);
 void backward_normalization_layer_gpu(const layer layer, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/option_list.c
+++ b/src/option_list.c
@ -3,6 +3,7 @@
 #include <string.h>
 #include "option_list.h"
 #include "utils.h"
 #include "data.h"
 list *read_data_cfg(char *filename)
 {
@ -71,7 +72,7 @@ int read_option(char *s, list *options)
 void option_insert(list *l, char *key, char *val)
 {
-    kvp *p = malloc(sizeof(kvp));
+    kvp* p = (kvp*)malloc(sizeof(kvp));
    p->key = key;
    p->val = val;
    p->used = 0;
--- a/src/option_list.h
+++ b/src/option_list.h
@ -9,6 +9,9 @@ typedef struct{
    int used;
 } kvp;
 #ifdef __cplusplus
 extern "C" {
 #endif
 list *read_data_cfg(char *filename);
 int read_option(char *s, list *options);
@ -28,4 +31,7 @@ void option_unused(list *l);
 //LIB_API metadata get_metadata(char *file);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/parser.c
+++ b/src/parser.c
@ -272,7 +272,7 @@ int *parse_yolo_mask(char *a, int *num)
        for (i = 0; i < len; ++i) {
            if (a[i] == ',') ++n;
        }
-        mask = calloc(n, sizeof(int));
+        mask = (int*)calloc(n, sizeof(int));
        for (i = 0; i < n; ++i) {
            int val = atoi(a);
            mask[i] = val;
@ -587,8 +587,8 @@ route_layer parse_route(list *options, size_params params, network net)
        if (l[i] == ',') ++n;
    }
-    int *layers = calloc(n, sizeof(int));
+    int* layers = (int*)calloc(n, sizeof(int));
-    int *sizes = calloc(n, sizeof(int));
+    int* sizes = (int*)calloc(n, sizeof(int));
    for(i = 0; i < n; ++i){
        int index = atoi(l);
        l = strchr(l, ',')+1;
@ -693,8 +693,8 @@ void parse_net_options(list *options, network *net)
        for(i = 0; i < len; ++i){
            if (l[i] == ',') ++n;
        }
-        int *steps = calloc(n, sizeof(int));
+        int* steps = (int*)calloc(n, sizeof(int));
-        float *scales = calloc(n, sizeof(float));
+        float* scales = (float*)calloc(n, sizeof(float));
        for(i = 0; i < n; ++i){
            int step    = atoi(l);
            float scale = atof(p);
@ -765,7 +765,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
        fprintf(stderr, "%4d ", count);
        s = (section *)n->val;
        options = s->options;
-        layer l = {0};
+        layer l = { (LAYER_TYPE)0 };
        LAYER_TYPE lt = string_to_layer_type(s->type);
        if(lt == CONVOLUTIONAL){
            l = parse_convolutional(options, params);
@ -864,7 +864,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
        if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
        else {
            cudaGetLastError(); // reset CUDA-error
-            net.input_pinned_cpu = calloc(size, sizeof(float));
+            net.input_pinned_cpu = (float*)calloc(size, sizeof(float));
        }
        // pre-allocate memory for inference on Tensor Cores (fp16)
@ -879,12 +879,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
            net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
        }
        else {
-            net.workspace = calloc(1, workspace_size);
+            net.workspace = (float*)calloc(1, workspace_size);
        }
    }
 #else
        if (workspace_size) {
-            net.workspace = calloc(1, workspace_size);
+            net.workspace = (float*)calloc(1, workspace_size);
        }
 #endif
@ -911,7 +911,7 @@ list *read_cfg(char *filename)
        strip(line);
        switch(line[0]){
            case '[':
-                current = malloc(sizeof(section));
+                current = (section*)malloc(sizeof(section));
                list_insert(sections, current);
                current->options = make_list();
                current->type = line;
@ -1091,7 +1091,7 @@ void save_weights(network net, char *filename)
 void transpose_matrix(float *a, int rows, int cols)
 {
-    float *transpose = calloc(rows*cols, sizeof(float));
+    float* transpose = (float*)calloc(rows * cols, sizeof(float));
    int x, y;
    for(x = 0; x < rows; ++x){
        for(y = 0; y < cols; ++y){
@ -1313,7 +1313,7 @@ void load_weights(network *net, char *filename)
 network *load_network_custom(char *cfg, char *weights, int clear, int batch)
 {
    printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
-    network *net = calloc(1, sizeof(network));
+    network* net = (network*)calloc(1, sizeof(network));
    *net = parse_network_cfg_custom(cfg, batch, 0);
    if (weights && weights[0] != 0) {
        load_weights(net, weights);
@ -1326,7 +1326,7 @@ network *load_network_custom(char *cfg, char *weights, int clear, int batch)
 network *load_network(char *cfg, char *weights, int clear)
 {
    printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
-    network *net = calloc(1, sizeof(network));
+    network* net = (network*)calloc(1, sizeof(network));
    *net = parse_network_cfg(cfg);
    if (weights && weights[0] != 0) {
        load_weights(net, weights);
--- a/src/parser.h
+++ b/src/parser.h
@ -2,6 +2,9 @@
 #define PARSER_H
 #include "network.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 network parse_network_cfg(char *filename);
 network parse_network_cfg_custom(char *filename, int batch, int time_steps);
 void save_network(network net, char *filename);
@ -11,4 +14,7 @@ void save_weights_double(network net, char *filename);
 void load_weights(network *net, char *filename);
 void load_weights_upto(network *net, char *filename, int cutoff);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/region_layer.c
+++ b/src/region_layer.c
@ -9,11 +9,10 @@
 #include <string.h>
 #include <stdlib.h>
 #define DOABS 1
 region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes)
 {
-    region_layer l = {0};
+    region_layer l = { (LAYER_TYPE)0 };
    l.type = REGION;
    l.n = n;
@ -22,15 +21,15 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
    l.w = w;
    l.classes = classes;
    l.coords = coords;
-    l.cost = calloc(1, sizeof(float));
+    l.cost = (float*)calloc(1, sizeof(float));
-    l.biases = calloc(n*2, sizeof(float));
+    l.biases = (float*)calloc(n * 2, sizeof(float));
-    l.bias_updates = calloc(n*2, sizeof(float));
+    l.bias_updates = (float*)calloc(n * 2, sizeof(float));
    l.outputs = h*w*n*(classes + coords + 1);
    l.inputs = l.outputs;
    l.max_boxes = max_boxes;
    l.truths = max_boxes*(5);
-    l.delta = calloc(batch*l.outputs, sizeof(float));
+    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
-    l.output = calloc(batch*l.outputs, sizeof(float));
+    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
    int i;
    for(i = 0; i < n*2; ++i){
        l.biases[i] = .5;
@ -61,8 +60,8 @@ void resize_region_layer(layer *l, int w, int h)
    l->outputs = h*w*l->n*(l->classes + l->coords + 1);
    l->inputs = l->outputs;
-    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
+    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
-    l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
+    l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
 #ifdef GPU
    if (old_w < w || old_h < h) {
@ -444,11 +443,11 @@ void forward_region_layer_gpu(const region_layer l, network_state state)
        softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
    }
-    float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
+    float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
    float *truth_cpu = 0;
    if(state.truth){
        int num_truth = l.batch*l.truths;
-        truth_cpu = calloc(num_truth, sizeof(float));
+        truth_cpu = (float*)calloc(num_truth, sizeof(float));
        cuda_pull_array(state.truth, truth_cpu, num_truth);
    }
    cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs);
--- a/src/region_layer.h
+++ b/src/region_layer.h
@ -6,6 +6,9 @@
 typedef layer region_layer;
 #ifdef __cplusplus
 extern "C" {
 #endif
 region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords, int max_boxes);
 void forward_region_layer(const region_layer l, network_state state);
 void backward_region_layer(const region_layer l, network_state state);
@ -20,4 +23,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state);
 void backward_region_layer_gpu(region_layer l, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/reorg_layer.c
+++ b/src/reorg_layer.c
@ -6,7 +6,7 @@
 layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
 {
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
    l.type = REORG;
    l.batch = batch;
    l.stride = stride;
@ -27,8 +27,8 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = h*w*c;
    int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.output =  calloc(output_size, sizeof(float));
+    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta =   calloc(output_size, sizeof(float));
+    l.delta = (float*)calloc(output_size, sizeof(float));
    l.forward = forward_reorg_layer;
    l.backward = backward_reorg_layer;
@ -64,8 +64,8 @@ void resize_reorg_layer(layer *l, int w, int h)
    l->inputs = l->outputs;
    int output_size = l->outputs * l->batch;
-    l->output = realloc(l->output, output_size * sizeof(float));
+    l->output = (float*)realloc(l->output, output_size * sizeof(float));
-    l->delta = realloc(l->delta, output_size * sizeof(float));
+    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
 #ifdef GPU
    cuda_free(l->output_gpu);
--- a/src/reorg_layer.h
+++ b/src/reorg_layer.h
@ -6,6 +6,9 @@
 #include "layer.h"
 #include "network.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse);
 void resize_reorg_layer(layer *l, int w, int h);
 void forward_reorg_layer(const layer l, network_state state);
@ -16,5 +19,8 @@ void forward_reorg_layer_gpu(layer l, network_state state);
 void backward_reorg_layer_gpu(layer l, network_state state);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/Show More
+++ b/Show More