From b3579380dc763135c60251a58a5c46e3ec124ccc Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Thu, 14 Feb 2019 17:28:23 +0100
Subject: [PATCH] improve compatibility with c++ compilers, prepare for CMake

---
 .gitignore                     |    5 +
 include/darknet.h              |   32 +
 include/yolo_v2_class.hpp      |   24 +-
 src/activation_kernels.cu      |    2 -
 src/activation_layer.c         |    6 +-
 src/activation_layer.h         |    6 +
 src/activations.h              |    6 +
 src/art.c                      |    7 +-
 src/avgpool_layer.c            |    6 +-
 src/avgpool_layer.h            |    6 +
 src/avgpool_layer_kernels.cu   |    2 -
 src/batchnorm_layer.c          |   18 +-
 src/batchnorm_layer.h          |    6 +
 src/blas.c                     |    2 +-
 src/blas.h                     |   12 +-
 src/blas_kernels.cu            |    2 -
 src/box.c                      |    2 +-
 src/box.h                      |    6 +
 src/captcha.c                  |   14 +-
 src/cifar.c                    |    6 +-
 src/classifier.c               |   44 +-
 src/classifier.h               |   10 +
 src/coco.c                     |   36 +-
 src/col2im.c                   |    1 +
 src/col2im.h                   |    6 +
 src/col2im_kernels.cu          |    2 -
 src/compare.c                  |   12 +-
 src/connected_layer.c          |   34 +-
 src/connected_layer.h          |    7 +
 src/convolutional_kernels.cu   |   17 +-
 src/convolutional_layer.c      |  101 +-
 src/convolutional_layer.h      |    8 +-
 src/cost_layer.c               |   12 +-
 src/cost_layer.h               |    6 +
 src/cpu_gemm.c                 |    6 +-
 src/crnn_layer.c               |   13 +-
 src/crnn_layer.h               |    6 +
 src/crop_layer.c               |    6 +-
 src/crop_layer.h               |    6 +
 src/crop_layer_kernels.cu      |    2 -
 src/cuda.c                     |   14 +-
 src/cuda.h                     |   31 +-
 src/darknet.c                  |    8 +-
 src/{unistd.h => darkunistd.h} |   16 +-
 src/data.c                     |   71 +-
 src/data.h                     |   11 +-
 src/deconvolutional_kernels.cu |    4 +-
 src/deconvolutional_layer.c    |   24 +-
 src/deconvolutional_layer.h    |   10 +-
 src/demo.c                     |   43 +-
 src/demo.h                     |   10 +-
 src/detection_layer.c          |   14 +-
 src/detection_layer.h          |    6 +
 src/detector.c                 |   72 +-
 src/dice.c                     |    2 +-
 src/dropout_layer.c            |    6 +-
 src/dropout_layer.h            |    6 +
 src/dropout_layer_kernels.cu   |    2 -
 src/gemm.c                     |   66 +-
 src/gemm.h                     |    7 +
 src/getopt.c                   | 1690 +++++++++-----------------------
 src/getopt.h                   |  317 +++---
 src/gettimeofday.c             |   72 +-
 src/gettimeofday.h             |   51 +-
 src/go.c                       |   67 +-
 src/gru_layer.c                |   32 +-
 src/gru_layer.h                |    6 +
 src/http_stream.cpp            |   73 +-
 src/http_stream.h              |   11 +-
 src/im2col.h                   |    8 +
 src/im2col_kernels.cu          |   22 +-
 src/image.c                    |   45 +-
 src/image.h                    |   22 +-
 src/layer.c                    |    2 +-
 src/layer.h                    |    7 +-
 src/list.c                     |   22 +-
 src/list.h                     |    6 +
 src/local_layer.c              |   16 +-
 src/local_layer.h              |    6 +
 src/lstm_layer.c               |   50 +-
 src/lstm_layer.h               |    8 +-
 src/matrix.c                   |   28 +-
 src/matrix.h                   |    6 +
 src/maxpool_layer.c            |   14 +-
 src/maxpool_layer.h            |    6 +
 src/maxpool_layer_kernels.cu   |    2 -
 src/network.c                  |   54 +-
 src/network.h                  |    6 +-
 src/network_kernels.cu         |    6 +-
 src/nightmare.c                |    2 +-
 src/normalization_layer.c      |   18 +-
 src/normalization_layer.h      |    6 +
 src/option_list.c              |    3 +-
 src/option_list.h              |    6 +
 src/parser.c                   |   26 +-
 src/parser.h                   |    6 +
 src/region_layer.c             |   21 +-
 src/region_layer.h             |    6 +
 src/reorg_layer.c              |   10 +-
 src/reorg_layer.h              |    6 +
 src/reorg_old_layer.c          |   10 +-
 src/reorg_old_layer.h          |    6 +
 src/rnn.c                      |   38 +-
 src/rnn_layer.c                |   10 +-
 src/rnn_layer.h                |    6 +
 src/rnn_vid.c                  |   22 +-
 src/route_layer.c              |   10 +-
 src/route_layer.h              |    6 +
 src/shortcut_layer.c           |   10 +-
 src/shortcut_layer.h           |    6 +
 src/softmax_layer.c            |   10 +-
 src/softmax_layer.h            |    6 +
 src/super.c                    |    6 +-
 src/swag.c                     |    4 +-
 src/tag.c                      |    6 +-
 src/tree.c                     |   20 +-
 src/tree.h                     |    6 +
 src/upsample_layer.c           |   10 +-
 src/upsample_layer.h           |    6 +
 src/utils.c                    |   40 +-
 src/utils.h                    |   29 +-
 src/voxel.c                    |   10 +-
 src/writing.c                  |    4 +-
 src/yolo.c                     |   46 +-
 src/yolo_console_dll.cpp       |   54 +-
 src/yolo_layer.c               |   30 +-
 src/yolo_layer.h               |    6 +
 src/yolo_v2_class.cpp          |   19 +-
 128 files changed, 1871 insertions(+), 2258 deletions(-)
 rename src/{unistd.h => darkunistd.h} (88%)

diff --git a/.gitignore b/.gitignore
index 1bd67d08..da2d106e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,3 +28,8 @@ Thumbs.db
 # CMake #
 cmake-build-debug/
 CMakeLists.txt
+build_*/
+build.*
+cmake/
+*.patch
+.gitignore
diff --git a/include/darknet.h b/include/darknet.h
index 05607c42..e35524de 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -26,6 +26,38 @@
 #endif
 #endif
 
+#ifdef _WIN32
+#define PORT unsigned long
+#define ADDRPOINTER int*
+#else
+#define PORT unsigned short
+#define SOCKET int
+#define HOSTENT struct hostent
+#define SOCKADDR struct sockaddr
+#define SOCKADDR_IN struct sockaddr_in
+#define ADDRPOINTER unsigned int*
+#define INVALID_SOCKET -1
+#define SOCKET_ERROR -1
+#endif
+#define FULL_MASK 0xffffffff
+#define WARP_SIZE 32
+#define BLOCK 512
+#define NUMCHARS 37
+#define NFRAMES 3
+#define BLOCK_TRANSPOSE32 256
+#define DOABS 1
+#define SECRET_NUM -1234
+#define C_SHARP_MAX_OBJECTS 1000
+#define TILE_M 4 // 4 ops
+#define TILE_N 16 // AVX2 = 2 ops * 8 floats
+#define TILE_K 16 // loop
+#ifndef __COMPAR_FN_T
+#define __COMPAR_FN_T
+typedef int (*__compar_fn_t)(const void*, const void*);
+#ifdef __USE_GNU
+typedef __compar_fn_t comparison_fn_t;
+#endif
+#endif
 #ifdef GPU
 #define BLOCK 512
 
diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp
index b6bea64a..f7f13bda 100644
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@@ -1,17 +1,7 @@
-#pragma once
-#ifdef LIB_EXPORTS
-#if defined(_MSC_VER)
-#define LIB_API __declspec(dllexport)
-#else
-#define LIB_API __attribute__((visibility("default")))
-#endif
-#else
-#if defined(_MSC_VER)
-#define LIB_API
-#else
-#define LIB_API
-#endif
-#endif
+#ifndef YOLO_V2_CLASS_HPP
+#define YOLO_V2_CLASS_HPP
+
+#include "darknet.h"
 
 struct bbox_t {
     unsigned int x, y, w, h;    // (x,y) - top-left corner, (w, h) - width & height of bounded box
@@ -28,7 +18,6 @@ struct image_t {
     float *data;                // pointer to the image data
 };
 
-#define C_SHARP_MAX_OBJECTS 1000
 struct bbox_t_container {
     bbox_t candidates[C_SHARP_MAX_OBJECTS];
 };
@@ -41,8 +30,8 @@ struct bbox_t_container {
 
 #ifdef OPENCV
 #include <opencv2/opencv.hpp>            // C++
-#include "opencv2/highgui/highgui_c.h"    // C
-#include "opencv2/imgproc/imgproc_c.h"    // C
+#include <opencv2/highgui/highgui_c.h>   // C
+#include <opencv2/imgproc/imgproc_c.h>   // C
 #endif    // OPENCV
 
 extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu);
@@ -658,3 +647,4 @@ void free_img(image_t m) {
 
 #endif    // __cplusplus
 */
+#endif
diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu
index aec279d5..0144ca51 100644
--- a/src/activation_kernels.cu
+++ b/src/activation_kernels.cu
@@ -3,10 +3,8 @@
 #include "curand.h"
 #include "cublas_v2.h"
 
-extern "C" {
 #include "activations.h"
 #include "cuda.h"
-}
 
 
 __device__ float lhtan_activate_kernel(float x)
diff --git a/src/activation_layer.c b/src/activation_layer.c
index 3430dac4..2c323b8d 100644
--- a/src/activation_layer.c
+++ b/src/activation_layer.c
@@ -11,15 +11,15 @@
 
 layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
 {
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.type = ACTIVE;
 
     l.inputs = inputs;
     l.outputs = inputs;
     l.batch=batch;
 
-    l.output = calloc(batch*inputs, sizeof(float*));
-    l.delta = calloc(batch*inputs, sizeof(float*));
+    l.output = (float*)calloc(batch * inputs, sizeof(float));
+    l.delta = (float*)calloc(batch * inputs, sizeof(float));
 
     l.forward = forward_activation_layer;
     l.backward = backward_activation_layer;
diff --git a/src/activation_layer.h b/src/activation_layer.h
index a09756aa..c766c6af 100644
--- a/src/activation_layer.h
+++ b/src/activation_layer.h
@@ -5,6 +5,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
 
 void forward_activation_layer(layer l, network_state state);
@@ -15,5 +18,8 @@ void forward_activation_layer_gpu(layer l, network_state state);
 void backward_activation_layer_gpu(layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/activations.h b/src/activations.h
index 849c65d3..4ecf97d7 100644
--- a/src/activations.h
+++ b/src/activations.h
@@ -8,6 +8,9 @@
 //    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
 //}ACTIVATION;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 ACTIVATION get_activation(char *s);
 
 char *get_activation_string(ACTIVATION a);
@@ -87,5 +90,8 @@ static inline float leaky_gradient(float x){return (x>0) ? 1 : .1f;}
 static inline float tanh_gradient(float x){return 1-x*x;}
 static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01f : .125f;}
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/art.c b/src/art.c
index 2a28370a..364cca3d 100644
--- a/src/art.c
+++ b/src/art.c
@@ -6,7 +6,6 @@
 #include "classifier.h"
 #ifdef WIN32
 #include <time.h>
-#include <winsock.h>
 #include "gettimeofday.h"
 #else
 #include <sys/time.h>
@@ -14,10 +13,10 @@
 
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/core/version.hpp"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
 image get_image_from_stream(CvCapture *cap);
 #endif
diff --git a/src/avgpool_layer.c b/src/avgpool_layer.c
index b6932fe7..983fe759 100644
--- a/src/avgpool_layer.c
+++ b/src/avgpool_layer.c
@@ -5,7 +5,7 @@
 avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
 {
     fprintf(stderr, "avg                     %4d x%4d x%4d   ->  %4d\n",  w, h, c, c);
-    avgpool_layer l = {0};
+    avgpool_layer l = { (LAYER_TYPE)0 };
     l.type = AVGPOOL;
     l.batch = batch;
     l.h = h;
@@ -17,8 +17,8 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
     l.outputs = l.out_c;
     l.inputs = h*w*c;
     int output_size = l.outputs * batch;
-    l.output =  calloc(output_size, sizeof(float));
-    l.delta =   calloc(output_size, sizeof(float));
+    l.output = (float*)calloc(output_size, sizeof(float));
+    l.delta = (float*)calloc(output_size, sizeof(float));
     l.forward = forward_avgpool_layer;
     l.backward = backward_avgpool_layer;
     #ifdef GPU
diff --git a/src/avgpool_layer.h b/src/avgpool_layer.h
index f8329aea..f7679aa9 100644
--- a/src/avgpool_layer.h
+++ b/src/avgpool_layer.h
@@ -8,6 +8,9 @@
 
 typedef layer avgpool_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 image get_avgpool_image(avgpool_layer l);
 avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
 void resize_avgpool_layer(avgpool_layer *l, int w, int h);
@@ -19,5 +22,8 @@ void forward_avgpool_layer_gpu(avgpool_layer l, network_state state);
 void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/avgpool_layer_kernels.cu b/src/avgpool_layer_kernels.cu
index 784d6762..1bb48ae6 100644
--- a/src/avgpool_layer_kernels.cu
+++ b/src/avgpool_layer_kernels.cu
@@ -2,10 +2,8 @@
 #include "curand.h"
 #include "cublas_v2.h"
 
-extern "C" {
 #include "avgpool_layer.h"
 #include "cuda.h"
-}
 
 __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
 {
diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c
index a2870405..d6ce989e 100644
--- a/src/batchnorm_layer.c
+++ b/src/batchnorm_layer.c
@@ -5,29 +5,29 @@
 layer make_batchnorm_layer(int batch, int w, int h, int c)
 {
     fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
-    layer layer = {0};
+    layer layer = { (LAYER_TYPE)0 };
     layer.type = BATCHNORM;
     layer.batch = batch;
     layer.h = layer.out_h = h;
     layer.w = layer.out_w = w;
     layer.c = layer.out_c = c;
-    layer.output = calloc(h * w * c * batch, sizeof(float));
-    layer.delta  = calloc(h * w * c * batch, sizeof(float));
+    layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
     layer.inputs = w*h*c;
     layer.outputs = layer.inputs;
 
-    layer.scales = calloc(c, sizeof(float));
-    layer.scale_updates = calloc(c, sizeof(float));
+    layer.scales = (float*)calloc(c, sizeof(float));
+    layer.scale_updates = (float*)calloc(c, sizeof(float));
     int i;
     for(i = 0; i < c; ++i){
         layer.scales[i] = 1;
     }
 
-    layer.mean = calloc(c, sizeof(float));
-    layer.variance = calloc(c, sizeof(float));
+    layer.mean = (float*)calloc(c, sizeof(float));
+    layer.variance = (float*)calloc(c, sizeof(float));
 
-    layer.rolling_mean = calloc(c, sizeof(float));
-    layer.rolling_variance = calloc(c, sizeof(float));
+    layer.rolling_mean = (float*)calloc(c, sizeof(float));
+    layer.rolling_variance = (float*)calloc(c, sizeof(float));
 
     layer.forward = forward_batchnorm_layer;
     layer.backward = backward_batchnorm_layer;
diff --git a/src/batchnorm_layer.h b/src/batchnorm_layer.h
index 99d1d0fe..1ea38d72 100644
--- a/src/batchnorm_layer.h
+++ b/src/batchnorm_layer.h
@@ -5,6 +5,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_batchnorm_layer(int batch, int w, int h, int c);
 void forward_batchnorm_layer(layer l, network_state state);
 void backward_batchnorm_layer(layer l, network_state state);
@@ -16,4 +19,7 @@ void pull_batchnorm_layer(layer l);
 void push_batchnorm_layer(layer l);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/blas.c b/src/blas.c
index ae84dc72..32d44cb5 100644
--- a/src/blas.c
+++ b/src/blas.c
@@ -34,7 +34,7 @@ void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride,
 
 void flatten(float *x, int size, int layers, int batch, int forward)
 {
-    float *swap = calloc(size*layers*batch, sizeof(float));
+    float* swap = (float*)calloc(size * layers * batch, sizeof(float));
     int i,c,b;
     for(b = 0; b < batch; ++b){
         for(c = 0; c < layers; ++c){
diff --git a/src/blas.h b/src/blas.h
index 9b7f3d5a..19b72b76 100644
--- a/src/blas.h
+++ b/src/blas.h
@@ -1,5 +1,12 @@
 #ifndef BLAS_H
 #define BLAS_H
+#ifdef GPU
+#include "cuda.h"
+#include "tree.h"
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
 void flatten(float *x, int size, int layers, int batch, int forward);
 void pm(int M, int N, float *A);
 float *random_matrix(int rows, int cols);
@@ -41,8 +48,6 @@ void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, i
 void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
 
 #ifdef GPU
-#include "cuda.h"
-#include "tree.h"
 
 void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
 void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
@@ -97,5 +102,8 @@ void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int for
 
 void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);
 
+#endif
+#ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index c1444011..72d92129 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -3,12 +3,10 @@
 #include "cublas_v2.h"
 #include <assert.h>
 
-extern "C" {
 #include "blas.h"
 #include "cuda.h"
 #include "utils.h"
 #include "tree.h"
-}
 
 __global__ void scale_bias_kernel(float *output, float *biases, int n, int size)
 {
diff --git a/src/box.c b/src/box.c
index 718215fe..bc4958a2 100644
--- a/src/box.c
+++ b/src/box.c
@@ -249,7 +249,7 @@ int nms_comparator(const void *pa, const void *pb)
 void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh)
 {
     int i, j, k;
-    sortable_bbox *s = calloc(total, sizeof(sortable_bbox));
+    sortable_bbox* s = (sortable_bbox*)calloc(total, sizeof(sortable_bbox));
 
     for(i = 0; i < total; ++i){
         s[i].index = i;       
diff --git a/src/box.h b/src/box.h
index 0e6fd918..9d6aa4f3 100644
--- a/src/box.h
+++ b/src/box.h
@@ -27,6 +27,9 @@ typedef struct detection_with_class {
 	int best_class;
 } detection_with_class;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 box float_to_box(float *f);
 float box_iou(box a, box b);
 float box_rmse(box a, box b);
@@ -42,4 +45,7 @@ box encode_box(box b, box anchor);
 // Return number of selected detections in *selected_detections_num
 detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names);
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/captcha.c b/src/captcha.c
index 3d449b26..230c723f 100644
--- a/src/captcha.c
+++ b/src/captcha.c
@@ -41,11 +41,11 @@ void train_captcha(char *cfgfile, char *weightfile)
     int i = *net.seen/imgs;
     int solved = 1;
     list *plist;
-    char **labels = get_labels("/data/captcha/reimgs.labels.list");
+    char** labels = get_labels("data/captcha/reimgs.labels.list");
     if (solved){
-        plist = get_paths("/data/captcha/reimgs.solved.list");
+        plist = get_paths("data/captcha/reimgs.solved.list");
     }else{
-        plist = get_paths("/data/captcha/reimgs.raw.list");
+        plist = get_paths("data/captcha/reimgs.raw.list");
     }
     char **paths = (char **)list_to_array(plist);
     printf("%d\n", plist->size);
@@ -89,7 +89,7 @@ void train_captcha(char *cfgfile, char *weightfile)
         free_data(train);
         if(i%100==0){
             char buff[256];
-            sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i);
+            sprintf(buff, "imagenet_backup/%s_%d.weights", base, i);
             save_weights(net, buff);
         }
     }
@@ -104,7 +104,7 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
     set_batch_network(&net, 1);
     srand(2222222);
     int i = 0;
-    char **names = get_labels("/data/captcha/reimgs.labels.list");
+    char** names = get_labels("data/captcha/reimgs.labels.list");
     char buff[256];
     char *input = buff;
     int indexes[26];
@@ -137,12 +137,12 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
 
 void valid_captcha(char *cfgfile, char *weightfile, char *filename)
 {
-    char **labels = get_labels("/data/captcha/reimgs.labels.list");
+    char** labels = get_labels("data/captcha/reimgs.labels.list");
     network net = parse_network_cfg(cfgfile);
     if(weightfile){
         load_weights(&net, weightfile);
     }
-    list *plist = get_paths("/data/captcha/reimgs.fg.list");
+    list* plist = get_paths("data/captcha/reimgs.fg.list");
     char **paths = (char **)list_to_array(plist);
     int N = plist->size;
     int outputs = net.outputs;
diff --git a/src/cifar.c b/src/cifar.c
index 4a27e795..a9daa979 100644
--- a/src/cifar.c
+++ b/src/cifar.c
@@ -5,7 +5,7 @@
 #include "blas.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 void train_cifar(char *cfgfile, char *weightfile)
@@ -20,7 +20,7 @@ void train_cifar(char *cfgfile, char *weightfile)
     }
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
 
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     int classes = 10;
     int N = 50000;
 
@@ -68,7 +68,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
     }
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
 
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     int classes = 10;
     int N = 50000;
 
diff --git a/src/classifier.c b/src/classifier.c
index 11b6968d..24b956d7 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -8,20 +8,18 @@
 #include "cuda.h"
 #ifdef WIN32
 #include <time.h>
-#include <winsock.h>
 #include "gettimeofday.h"
 #else
 #include <sys/time.h>
 #endif
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/core/version.hpp"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
-image get_image_from_stream(CvCapture *cap);
-image get_image_from_stream_cpp(CvCapture *cap);
+#include "image.h"
 #include "http_stream.h"
 IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
 
@@ -34,7 +32,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
 
 float *get_regression_values(char **labels, int n)
 {
-    float *v = calloc(n, sizeof(float));
+    float* v = (float*)calloc(n, sizeof(float));
     int i;
     for(i = 0; i < n; ++i){
         char *p = strchr(labels[i], ' ');
@@ -52,7 +50,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
     printf("%d\n", ngpus);
-    network *nets = calloc(ngpus, sizeof(network));
+    network* nets = (network*)calloc(ngpus, sizeof(network));
 
     srand(time(0));
     int seed = rand();
@@ -431,7 +429,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
 
     float avg_acc = 0;
     float avg_topk = 0;
-    int *indexes = calloc(topk, sizeof(int));
+    int* indexes = (int*)calloc(topk, sizeof(int));
 
     for(i = 0; i < m; ++i){
         int class_id = -1;
@@ -458,7 +456,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
         images[7] = crop_image(im, 0, 0, w, h);
         images[8] = crop_image(im, -shift, shift, w, h);
         images[9] = crop_image(im, shift, shift, w, h);
-        float *pred = calloc(classes, sizeof(float));
+        float* pred = (float*)calloc(classes, sizeof(float));
         for(j = 0; j < 10; ++j){
             float *p = network_predict(net, images[j].data);
             if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
@@ -504,7 +502,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
 
     float avg_acc = 0;
     float avg_topk = 0;
-    int *indexes = calloc(topk, sizeof(int));
+    int* indexes = (int*)calloc(topk, sizeof(int));
 
     int size = net.w;
     for(i = 0; i < m; ++i){
@@ -581,7 +579,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
 
     float avg_acc = 0;
     float avg_topk = 0;
-    int *indexes = calloc(topk, sizeof(int));
+    int* indexes = (int*)calloc(topk, sizeof(int));
 
     for(i = 0; i < m; ++i){
         int class_id = -1;
@@ -651,7 +649,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
 
     float avg_acc = 0;
     float avg_topk = 0;
-    int *indexes = calloc(topk, sizeof(int));
+    int* indexes = (int*)calloc(topk, sizeof(int));
 
     for(i = 0; i < m; ++i){
         int class_id = -1;
@@ -662,7 +660,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
                 break;
             }
         }
-        float *pred = calloc(classes, sizeof(float));
+        float* pred = (float*)calloc(classes, sizeof(float));
         image im = load_image_color(paths[i], 0, 0);
         for(j = 0; j < nscales; ++j){
             image r = resize_min(im, scales[j]);
@@ -707,7 +705,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena
     int i = 0;
     char **names = get_labels(name_list);
     clock_t time;
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
     char buff[256];
     char *input = buff;
     while(1){
@@ -790,7 +788,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
     int i = 0;
     char **names = get_labels(name_list);
     clock_t time;
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
     char buff[256];
     char *input = buff;
     int size = net.w;
@@ -973,7 +971,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
     char *name_list = option_find_str(options, "names", 0);
     char **names = get_labels(name_list);
 
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
 
     if(!cap) error("Couldn't connect to webcam.\n");
     //cvNamedWindow("Threat", CV_WINDOW_NORMAL);
@@ -1051,11 +1049,13 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
         }
         top_predictions(net, top, indexes);
         char buff[256];
-        sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
+        sprintf(buff, "tmp/threat_%06d", count);
         //save_image(out, buff);
 
+#ifndef _WIN32
         printf("\033[2J");
         printf("\033[1;1H");
+#endif
         printf("\nFPS:%.0f\n",fps);
 
         for(i = 0; i < top; ++i){
@@ -1111,7 +1111,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
     char *name_list = option_find_str(options, "names", 0);
     char **names = get_labels(name_list);
 
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
 
     if(!cap) error("Couldn't connect to webcam.\n");
     cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL);
@@ -1193,7 +1193,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
     char *name_list = option_find_str(options, "names", 0);
     char **names = get_labels(name_list);
 
-    int *indexes = calloc(top, sizeof(int));
+    int* indexes = (int*)calloc(top, sizeof(int));
 
     if(!cap) error("Couldn't connect to webcam.\n");
     cvNamedWindow("Classifier", CV_WINDOW_NORMAL);
@@ -1214,8 +1214,10 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
         if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1);
         top_predictions(net, top, indexes);
 
+#ifndef _WIN32
         printf("\033[2J");
         printf("\033[1;1H");
+#endif
         printf("\nFPS:%.0f\n",fps);
 
         for(i = 0; i < top; ++i){
@@ -1257,7 +1259,7 @@ void run_classifier(int argc, char **argv)
         for(i = 0; i < len; ++i){
             if (gpu_list[i] == ',') ++ngpus;
         }
-        gpus = calloc(ngpus, sizeof(int));
+        gpus = (int*)calloc(ngpus, sizeof(int));
         for(i = 0; i < ngpus; ++i){
             gpus[i] = atoi(gpu_list);
             gpu_list = strchr(gpu_list, ',')+1;
diff --git a/src/classifier.h b/src/classifier.h
index 3c89f49c..d94417d3 100644
--- a/src/classifier.h
+++ b/src/classifier.h
@@ -1,2 +1,12 @@
+#ifndef CLASSIFIER_H
+#define CLASSIFIER_H
 
+#include "list.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
 list *read_data_cfg(char *filename);
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/coco.c b/src/coco.c
index aff8e5a0..e3cfccea 100644
--- a/src/coco.c
+++ b/src/coco.c
@@ -9,7 +9,7 @@
 #include "demo.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};
@@ -22,7 +22,7 @@ void train_coco(char *cfgfile, char *weightfile)
     //char *train_images = "/home/pjreddie/data/coco/train.txt";
     char *train_images = "data/coco.trainval.txt";
     //char *train_images = "data/bags.train.list";
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     srand(time(0));
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
@@ -164,9 +164,9 @@ void validate_coco(char *cfgfile, char *weightfile)
     FILE *fp = fopen(buff, "w");
     fprintf(fp, "[\n");
 
-    box *boxes = calloc(side*side*l.n, sizeof(box));
-    float **probs = calloc(side*side*l.n, sizeof(float *));
-    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
+    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
+    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
+    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
 
     int m = plist->size;
     int i=0;
@@ -177,11 +177,11 @@ void validate_coco(char *cfgfile, char *weightfile)
     float iou_thresh = .5;
 
     int nthreads = 8;
-    image *val = calloc(nthreads, sizeof(image));
-    image *val_resized = calloc(nthreads, sizeof(image));
-    image *buf = calloc(nthreads, sizeof(image));
-    image *buf_resized = calloc(nthreads, sizeof(image));
-    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)calloc(nthreads, sizeof(image));
+    image* val_resized = (image*)calloc(nthreads, sizeof(image));
+    image* buf = (image*)calloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
 
     load_args args = {0};
     args.w = net.w;
@@ -240,7 +240,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
     srand(time(0));
 
     char *base = "results/comp4_det_test_";
-    list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt");
+    list* plist = get_paths("data/voc/test/2007_test.txt");
     char **paths = (char **)list_to_array(plist);
 
     layer l = net.layers[net.n-1];
@@ -248,15 +248,15 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
     int side = l.side;
 
     int j, k;
-    FILE **fps = calloc(classes, sizeof(FILE *));
+    FILE** fps = (FILE**)calloc(classes, sizeof(FILE*));
     for(j = 0; j < classes; ++j){
         char buff[1024];
         snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
         fps[j] = fopen(buff, "w");
     }
-    box *boxes = calloc(side*side*l.n, sizeof(box));
-    float **probs = calloc(side*side*l.n, sizeof(float *));
-    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
+    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
+    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
+    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
 
     int m = plist->size;
     int i=0;
@@ -328,9 +328,9 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
     char buff[256];
     char *input = buff;
     int j;
-    box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
-    float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
-    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
+    box* boxes = (box*)calloc(l.side * l.side * l.n, sizeof(box));
+    float** probs = (float**)calloc(l.side * l.side * l.n, sizeof(float*));
+    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
     while(1){
         if(filename){
             strncpy(input, filename, 256);
diff --git a/src/col2im.c b/src/col2im.c
index 5c4605e1..43126423 100644
--- a/src/col2im.c
+++ b/src/col2im.c
@@ -1,5 +1,6 @@
 #include <stdio.h>
 #include <math.h>
+#include "col2im.h"
 void col2im_add_pixel(float *im, int height, int width, int channels,
                         int row, int col, int channel, int pad, float val)
 {
diff --git a/src/col2im.h b/src/col2im.h
index 02374972..a8493e38 100644
--- a/src/col2im.h
+++ b/src/col2im.h
@@ -1,6 +1,9 @@
 #ifndef COL2IM_H
 #define COL2IM_H
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 void col2im_cpu(float* data_col,
         int channels, int height, int width,
         int ksize, int stride, int pad, float* data_im);
@@ -10,4 +13,7 @@ void col2im_ongpu(float *data_col,
         int channels, int height, int width,
         int ksize, int stride, int pad, float *data_im);
 #endif
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/col2im_kernels.cu b/src/col2im_kernels.cu
index 0fe61e49..34808c00 100644
--- a/src/col2im_kernels.cu
+++ b/src/col2im_kernels.cu
@@ -2,10 +2,8 @@
 #include "curand.h"
 #include "cublas_v2.h"
 
-extern "C" {
 #include "col2im.h"
 #include "cuda.h"
-}
 
 // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
 // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
diff --git a/src/compare.c b/src/compare.c
index 803d812d..d0d1f1f0 100644
--- a/src/compare.c
+++ b/src/compare.c
@@ -12,7 +12,7 @@ void train_compare(char *cfgfile, char *weightfile)
     srand(time(0));
     float avg_loss = -1;
     char *base = basecfg(cfgfile);
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     printf("%s\n", base);
     network net = parse_network_cfg(cfgfile);
     if(weightfile){
@@ -176,7 +176,7 @@ int bbox_comparator(const void *a, const void *b)
 
     image im1 = load_image_color(box1.filename, net.w, net.h);
     image im2 = load_image_color(box2.filename, net.w, net.h);
-    float *X  = calloc(net.w*net.h*net.c, sizeof(float));
+    float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
     memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
     memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
     float *predictions = network_predict(net, X);
@@ -205,7 +205,7 @@ void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, in
 {
     image im1 = load_image_color(a->filename, net.w, net.h);
     image im2 = load_image_color(b->filename, net.w, net.h);
-    float *X  = calloc(net.w*net.h*net.c, sizeof(float));
+    float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
     memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
     memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
     float *predictions = network_predict(net, X);
@@ -239,7 +239,7 @@ void SortMaster3000(char *filename, char *weightfile)
     char **paths = (char **)list_to_array(plist);
     int N = plist->size;
     free_list(plist);
-    sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
+    sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
     printf("Sorting %d boxes...\n", N);
     for(i = 0; i < N; ++i){
         boxes[i].filename = paths[i];
@@ -274,13 +274,13 @@ void BattleRoyaleWithCheese(char *filename, char *weightfile)
     int N = plist->size;
     int total = N;
     free_list(plist);
-    sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
+    sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
     printf("Battling %d boxes...\n", N);
     for(i = 0; i < N; ++i){
         boxes[i].filename = paths[i];
         boxes[i].net = net;
         boxes[i].classes = classes;
-        boxes[i].elos = calloc(classes, sizeof(float));;
+        boxes[i].elos = (float*)calloc(classes, sizeof(float));
         for(j = 0; j < classes; ++j){
             boxes[i].elos[j] = 1500;
         }
diff --git a/src/connected_layer.c b/src/connected_layer.c
index e54837ac..3520e914 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -54,7 +54,7 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
 {
     int total_batch = batch*steps;
     int i;
-    connected_layer l = {0};
+    connected_layer l = { (LAYER_TYPE)0 };
     l.type = CONNECTED;
 
     l.inputs = inputs;
@@ -74,14 +74,14 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
     l.activation = activation;
     l.learning_rate_scale = 1;
 
-    l.output = calloc(total_batch*outputs, sizeof(float));
-    l.delta = calloc(total_batch*outputs, sizeof(float));
+    l.output = (float*)calloc(total_batch * outputs, sizeof(float));
+    l.delta = (float*)calloc(total_batch * outputs, sizeof(float));
 
-    l.weight_updates = calloc(inputs*outputs, sizeof(float));
-    l.bias_updates = calloc(outputs, sizeof(float));
+    l.weight_updates = (float*)calloc(inputs * outputs, sizeof(float));
+    l.bias_updates = (float*)calloc(outputs, sizeof(float));
 
-    l.weights = calloc(outputs*inputs, sizeof(float));
-    l.biases = calloc(outputs, sizeof(float));
+    l.weights = (float*)calloc(outputs * inputs, sizeof(float));
+    l.biases = (float*)calloc(outputs, sizeof(float));
 
     l.forward = forward_connected_layer;
     l.backward = backward_connected_layer;
@@ -98,22 +98,22 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
     }
 
     if(batch_normalize){
-        l.scales = calloc(outputs, sizeof(float));
-        l.scale_updates = calloc(outputs, sizeof(float));
+        l.scales = (float*)calloc(outputs, sizeof(float));
+        l.scale_updates = (float*)calloc(outputs, sizeof(float));
         for(i = 0; i < outputs; ++i){
             l.scales[i] = 1;
         }
 
-        l.mean = calloc(outputs, sizeof(float));
-        l.mean_delta = calloc(outputs, sizeof(float));
-        l.variance = calloc(outputs, sizeof(float));
-        l.variance_delta = calloc(outputs, sizeof(float));
+        l.mean = (float*)calloc(outputs, sizeof(float));
+        l.mean_delta = (float*)calloc(outputs, sizeof(float));
+        l.variance = (float*)calloc(outputs, sizeof(float));
+        l.variance_delta = (float*)calloc(outputs, sizeof(float));
 
-        l.rolling_mean = calloc(outputs, sizeof(float));
-        l.rolling_variance = calloc(outputs, sizeof(float));
+        l.rolling_mean = (float*)calloc(outputs, sizeof(float));
+        l.rolling_variance = (float*)calloc(outputs, sizeof(float));
 
-        l.x = calloc(total_batch*outputs, sizeof(float));
-        l.x_norm = calloc(total_batch*outputs, sizeof(float));
+        l.x = (float*)calloc(total_batch * outputs, sizeof(float));
+        l.x_norm = (float*)calloc(total_batch * outputs, sizeof(float));
     }
 
 #ifdef GPU
diff --git a/src/connected_layer.h b/src/connected_layer.h
index 3775e0a5..80805cc2 100644
--- a/src/connected_layer.h
+++ b/src/connected_layer.h
@@ -7,7 +7,11 @@
 
 typedef layer connected_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 connected_layer make_connected_layer(int batch, int steps, int inputs, int outputs, ACTIVATION activation, int batch_normalize);
+size_t get_connected_workspace_size(layer l);
 
 void forward_connected_layer(connected_layer layer, network_state state);
 void backward_connected_layer(connected_layer layer, network_state state);
@@ -23,5 +27,8 @@ void push_connected_layer(connected_layer layer);
 void pull_connected_layer(connected_layer layer);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index 4293d5a0..05c847ce 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -3,10 +3,11 @@
 #include "cublas_v2.h"
 
 #ifdef CUDNN
+#ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "cudnn.lib")
 #endif
+#endif
 
-extern "C" {
 #include "convolutional_layer.h"
 #include "batchnorm_layer.h"
 #include "gemm.h"
@@ -15,7 +16,7 @@ extern "C" {
 #include "col2im.h"
 #include "utils.h"
 #include "cuda.h"
-}
+
 
 __global__ void binarize_kernel(float *x, int n, float *binary)
 {
@@ -73,7 +74,6 @@ void binarize_weights_gpu(float *weights, int n, int size, float *binary)
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
-#define WARP_SIZE 32
 
 __global__ void set_zero_kernel(float *src, int size)
 {
@@ -477,10 +477,10 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
                 simple_copy_ongpu(l.outputs*l.batch / 2, output16, l.x_gpu);
                 //copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1);
                 //cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream());
-                float one = 1;
-                float zero = 0;
+                float one = 1.0f;
+                float zero = 0.0f;
                 // Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth
-                // compared to FP32, it�s just that the statistics and value adjustment should be done in FP32.
+                // compared to FP32, it's just that the statistics and value adjustment should be done in FP32.
                 CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(cudnn_handle(),
                     CUDNN_BATCHNORM_SPATIAL,
                     &one,
@@ -639,8 +639,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
             //    l.mean_gpu = l.rolling_mean_gpu;
             //    l.variance_gpu = l.rolling_variance_gpu;
             //}
-            float one = 1;
-            float zero = 0;
+            float one = 1.0f;
+            float zero = 0.0f;
             CHECK_CUDNN(cudnnBatchNormalizationBackward(cudnn_handle(),
                 CUDNN_BATCHNORM_SPATIAL,
                 &one,
@@ -936,4 +936,3 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float
     }
 }
 */
-
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index 21dd7231..9e2d106e 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -9,8 +9,10 @@
 #include <time.h>
 
 #ifdef CUDNN
+#ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "cudnn.lib")
 #endif
+#endif
 
 #ifdef AI2
 #include "xnor_layer.h"
@@ -288,7 +290,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
             l->weightDesc,
             l->convDesc,
             l->dstTensorDesc,
-            forward_algo,
+            (cudnnConvolutionFwdPreference_t)forward_algo,
             0,
             &l->fw_algo));
     CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
@@ -296,7 +298,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
             l->ddstTensorDesc,
             l->convDesc,
             l->dsrcTensorDesc,
-            backward_algo,
+            (cudnnConvolutionBwdDataPreference_t)backward_algo,
             0,
             &l->bd_algo));
     CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
@@ -304,7 +306,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
             l->ddstTensorDesc,
             l->convDesc,
             l->dweightDesc,
-            backward_filter,
+            (cudnnConvolutionBwdFilterPreference_t)backward_filter,
             0,
             &l->bf_algo));
 
@@ -328,7 +330,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
 {
     int total_batch = batch*steps;
     int i;
-    convolutional_layer l = {0};
+    convolutional_layer l = { (LAYER_TYPE)0 };
     l.type = CONVOLUTIONAL;
 
     l.index = index;
@@ -346,11 +348,11 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
     l.batch_normalize = batch_normalize;
     l.learning_rate_scale = 1;
 
-    l.weights = calloc(c*n*size*size, sizeof(float));
-    l.weight_updates = calloc(c*n*size*size, sizeof(float));
+    l.weights = (float*)calloc(c * n * size * size, sizeof(float));
+    l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
 
-    l.biases = calloc(n, sizeof(float));
-    l.bias_updates = calloc(n, sizeof(float));
+    l.biases = (float*)calloc(n, sizeof(float));
+    l.bias_updates = (float*)calloc(n, sizeof(float));
 
     // float scale = 1./sqrt(size*size*c);
     float scale = sqrt(2./(size*size*c));
@@ -364,64 +366,64 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
     l.inputs = l.w * l.h * l.c;
     l.activation = activation;
 
-    l.output = calloc(total_batch*l.outputs, sizeof(float));
-    l.delta  = calloc(total_batch*l.outputs, sizeof(float));
+    l.output = (float*)calloc(total_batch*l.outputs, sizeof(float));
+    l.delta  = (float*)calloc(total_batch*l.outputs, sizeof(float));
 
     l.forward = forward_convolutional_layer;
     l.backward = backward_convolutional_layer;
     l.update = update_convolutional_layer;
     if(binary){
-        l.binary_weights = calloc(c*n*size*size, sizeof(float));
-        l.cweights = calloc(c*n*size*size, sizeof(char));
-        l.scales = calloc(n, sizeof(float));
+        l.binary_weights = (float*)calloc(c * n * size * size, sizeof(float));
+        l.cweights = (char*)calloc(c * n * size * size, sizeof(char));
+        l.scales = (float*)calloc(n, sizeof(float));
     }
     if(xnor){
-        l.binary_weights = calloc(c*n*size*size, sizeof(float));
-        l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
+        l.binary_weights = (float*)calloc(c * n * size * size, sizeof(float));
+        l.binary_input = (float*)calloc(l.inputs * l.batch, sizeof(float));
 
         int align = 32;// 8;
         int src_align = l.out_h*l.out_w;
         l.bit_align = src_align + (align - src_align % align);
 
-        l.mean_arr = calloc(l.n, sizeof(float));
+        l.mean_arr = (float*)calloc(l.n, sizeof(float));
 
         const size_t new_c = l.c / 32;
         size_t in_re_packed_input_size = new_c * l.w * l.h + 1;
-        l.bin_re_packed_input = calloc(in_re_packed_input_size, sizeof(uint32_t));
+        l.bin_re_packed_input = (uint32_t*)calloc(in_re_packed_input_size, sizeof(uint32_t));
 
         l.lda_align = 256;  // AVX2
         int k = l.size*l.size*l.c;
         size_t k_aligned = k + (l.lda_align - k%l.lda_align);
         size_t t_bit_input_size = k_aligned * l.bit_align / 8;
-        l.t_bit_input = calloc(t_bit_input_size, sizeof(char));
+        l.t_bit_input = (char*)calloc(t_bit_input_size, sizeof(char));
     }
 
     if(batch_normalize){
-        l.scales = calloc(n, sizeof(float));
-        l.scale_updates = calloc(n, sizeof(float));
+        l.scales = (float*)calloc(n, sizeof(float));
+        l.scale_updates = (float*)calloc(n, sizeof(float));
         for(i = 0; i < n; ++i){
             l.scales[i] = 1;
         }
 
-        l.mean = calloc(n, sizeof(float));
-        l.variance = calloc(n, sizeof(float));
+        l.mean = (float*)calloc(n, sizeof(float));
+        l.variance = (float*)calloc(n, sizeof(float));
 
-        l.mean_delta = calloc(n, sizeof(float));
-        l.variance_delta = calloc(n, sizeof(float));
+        l.mean_delta = (float*)calloc(n, sizeof(float));
+        l.variance_delta = (float*)calloc(n, sizeof(float));
 
-        l.rolling_mean = calloc(n, sizeof(float));
-        l.rolling_variance = calloc(n, sizeof(float));
-        l.x = calloc(total_batch*l.outputs, sizeof(float));
-        l.x_norm = calloc(total_batch*l.outputs, sizeof(float));
+        l.rolling_mean = (float*)calloc(n, sizeof(float));
+        l.rolling_variance = (float*)calloc(n, sizeof(float));
+        l.x = (float*)calloc(total_batch * l.outputs, sizeof(float));
+        l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float));
     }
     if(adam){
         l.adam = 1;
-        l.m = calloc(c*n*size*size, sizeof(float));
-        l.v = calloc(c*n*size*size, sizeof(float));
-        l.bias_m = calloc(n, sizeof(float));
-        l.scale_m = calloc(n, sizeof(float));
-        l.bias_v = calloc(n, sizeof(float));
-        l.scale_v = calloc(n, sizeof(float));
+        l.m = (float*)calloc(c * n * size * size, sizeof(float));
+        l.v = (float*)calloc(c * n * size * size, sizeof(float));
+        l.bias_m = (float*)calloc(n, sizeof(float));
+        l.scale_m = (float*)calloc(n, sizeof(float));
+        l.bias_v = (float*)calloc(n, sizeof(float));
+        l.scale_v = (float*)calloc(n, sizeof(float));
     }
 
 #ifdef GPU
@@ -549,11 +551,11 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
     l->outputs = l->out_h * l->out_w * l->out_c;
     l->inputs = l->w * l->h * l->c;
 
-    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
-    l->delta  = realloc(l->delta,  l->batch*l->outputs*sizeof(float));
+    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
+    l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
     if(l->batch_normalize){
-        l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
-        l->x_norm  = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
+        l->x = (float*)realloc(l->x, l->batch * l->outputs * sizeof(float));
+        l->x_norm = (float*)realloc(l->x_norm, l->batch * l->outputs * sizeof(float));
     }
 
     if (l->xnor) {
@@ -642,7 +644,7 @@ void gemm_nn_custom(int M, int N, int K, float ALPHA,
     int i, j, k;
     for (i = 0; i < M; ++i) {
         for (k = 0; k < K; ++k) {
-            register float A_PART = ALPHA*A[i*lda + k];
+            float A_PART = ALPHA * A[i * lda + k];
             //printf("\n weight = %f \n", A_PART);
             for (j = 0; j < N; ++j) {
                 C[i*ldc + j] += A_PART*B[k*ldb + j];
@@ -695,8 +697,8 @@ void binary_align_weights(convolutional_layer *l)
 
     size_t align_weights_size = new_lda * m;
     l->align_bit_weights_size = align_weights_size / 8 + 1;
-    float *align_weights = calloc(align_weights_size, sizeof(float));
-    l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char));
+    float* align_weights = (float*)calloc(align_weights_size, sizeof(float));
+    l->align_bit_weights = (char*)calloc(l->align_bit_weights_size, sizeof(char));
 
     size_t i, j;
     // align A without transpose
@@ -739,7 +741,7 @@ void binary_align_weights(convolutional_layer *l)
         //printf("\n l.index = %d \t aw[0] = %f, aw[1] = %f, aw[2] = %f, aw[3] = %f \n", l->index, align_weights[0], align_weights[1], align_weights[2], align_weights[3]);
         //memcpy(l->binary_weights, align_weights, (l->size * l->size * l->c * l->n) * sizeof(float));
 
-        float_to_bit(align_weights, l->align_bit_weights, align_weights_size);
+        float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size);
 
         //if (l->n >= 32)
         if(gpu_index >= 0)
@@ -757,7 +759,7 @@ void binary_align_weights(convolutional_layer *l)
         //get_mean_array(l->binary_weights, m*new_lda, l->n, l->mean_arr);
     }
     else {
-        float_to_bit(align_weights, l->align_bit_weights, align_weights_size);
+        float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size);
 
         get_mean_array(l->binary_weights, m*k, l->n, l->mean_arr);
     }
@@ -808,7 +810,7 @@ size_t binary_transpose_align_input(int k, int n, float *b, char **t_bit_input,
     // t_bit_input - [new_ldb, n] - [k', n]
 
     //transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8);
-    transpose_bin(b, *t_bit_input, k, n, bit_align, new_ldb, 8);
+    transpose_bin((uint32_t*)b, (uint32_t*)*t_bit_input, k, n, bit_align, new_ldb, 8);
 
     return t_intput_size;
 }
@@ -874,7 +876,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
                 repack_input(state.input, state.workspace, l.w, l.h, l.c);
 
                 // 32 x floats -> 1 x uint32_t
-                float_to_bit(state.workspace, (char *)l.bin_re_packed_input, l.c * l.w * l.h);
+                float_to_bit(state.workspace, (unsigned char *)l.bin_re_packed_input, l.c * l.w * l.h);
 
                 //free(re_packed_input);
 
@@ -900,10 +902,10 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
 
 // // then exit from if()
 
-                transpose_uint32((uint32_t *)state.workspace, l.t_bit_input, new_k, n, n, new_ldb);
+                transpose_uint32((uint32_t *)state.workspace, (uint32_t*)l.t_bit_input, new_k, n, n, new_ldb);
 
                 // the main GEMM function
-                gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, l.t_bit_input, new_ldb, c, n, l.mean_arr);
+                gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr);
 
                 // // alternative GEMM
                 //gemm_nn_bin_transposed_32bit_packed(m, n, new_k, 1,
@@ -945,7 +947,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
                     size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align);
 
                     // 5x times faster than gemm()-float32
-                    gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, l.t_bit_input, new_ldb, c, n, l.mean_arr);
+                    gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr);
 
                     //gemm_nn_custom_bin_mean_transposed(m, n, k, 1, bit_weights, k, t_bit_input, new_ldb, c, n, mean_arr);
 
@@ -1074,7 +1076,7 @@ void rescale_weights(convolutional_layer l, float scale, float trans)
 
 image *get_weights(convolutional_layer l)
 {
-    image *weights = calloc(l.n, sizeof(image));
+    image* weights = (image*)calloc(l.n, sizeof(image));
     int i;
     for(i = 0; i < l.n; ++i){
         weights[i] = copy_image(get_convolutional_weight(l, i));
@@ -1097,4 +1099,3 @@ image *visualize_convolutional_layer(convolutional_layer l, char *window, image
     free_image(dc);
     return single_weights;
 }
-
diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h
index d6ec9551..bad3b84e 100644
--- a/src/convolutional_layer.h
+++ b/src/convolutional_layer.h
@@ -9,6 +9,9 @@
 
 typedef layer convolutional_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 #ifdef GPU
 void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
 void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
@@ -22,11 +25,11 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
 #ifdef CUDNN
 void cudnn_convolutional_setup(layer *l, int cudnn_preference);
 void create_convolutional_cudnn_tensors(layer *l);
-size_t get_convolutional_workspace_size(layer l);
 void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
 #endif
 #endif
 
+size_t get_convolutional_workspace_size(layer l);
 convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index);
 void denormalize_convolutional_layer(convolutional_layer l);
 void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
@@ -53,5 +56,8 @@ int convolutional_out_width(convolutional_layer layer);
 void rescale_weights(convolutional_layer l, float scale, float trans);
 void rgbgr_weights(convolutional_layer l);
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/cost_layer.c b/src/cost_layer.c
index 39d2398b..39913d67 100644
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@@ -32,7 +32,7 @@ char *get_cost_string(COST_TYPE a)
 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
 {
     fprintf(stderr, "cost                                           %4d\n",  inputs);
-    cost_layer l = {0};
+    cost_layer l = { (LAYER_TYPE)0 };
     l.type = COST;
 
     l.scale = scale;
@@ -40,9 +40,9 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca
     l.inputs = inputs;
     l.outputs = inputs;
     l.cost_type = cost_type;
-    l.delta = calloc(inputs*batch, sizeof(float));
-    l.output = calloc(inputs*batch, sizeof(float));
-    l.cost = calloc(1, sizeof(float));
+    l.delta = (float*)calloc(inputs * batch, sizeof(float));
+    l.output = (float*)calloc(inputs * batch, sizeof(float));
+    l.cost = (float*)calloc(1, sizeof(float));
 
     l.forward = forward_cost_layer;
     l.backward = backward_cost_layer;
@@ -60,8 +60,8 @@ void resize_cost_layer(cost_layer *l, int inputs)
 {
     l->inputs = inputs;
     l->outputs = inputs;
-    l->delta = realloc(l->delta, inputs*l->batch*sizeof(float));
-    l->output = realloc(l->output, inputs*l->batch*sizeof(float));
+    l->delta = (float*)realloc(l->delta, inputs * l->batch * sizeof(float));
+    l->output = (float*)realloc(l->output, inputs * l->batch * sizeof(float));
 #ifdef GPU
     cuda_free(l->delta_gpu);
     cuda_free(l->output_gpu);
diff --git a/src/cost_layer.h b/src/cost_layer.h
index a692831e..92666aa3 100644
--- a/src/cost_layer.h
+++ b/src/cost_layer.h
@@ -5,6 +5,9 @@
 
 typedef layer cost_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 COST_TYPE get_cost_type(char *s);
 char *get_cost_string(COST_TYPE a);
 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
@@ -17,4 +20,7 @@ void forward_cost_layer_gpu(cost_layer l, network_state state);
 void backward_cost_layer_gpu(const cost_layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/cpu_gemm.c b/src/cpu_gemm.c
index 1ad6d333..6a3cd1dc 100644
--- a/src/cpu_gemm.c
+++ b/src/cpu_gemm.c
@@ -9,7 +9,7 @@ void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
     int i,j,k;
     for(i = 0; i < M; ++i){
         for(k = 0; k < K; ++k){
-            register float A_PART = ALPHA*A[i*lda+k];
+            float A_PART = ALPHA * A[i * lda + k];
             for(j = 0; j < N; ++j){
                 C[i*ldc+j] += A_PART*B[k*ldb+j];
             }
@@ -26,7 +26,7 @@ void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
     int i,j,k;
     for(i = 0; i < M; ++i){
         for(j = 0; j < N; ++j){
-            register float sum = 0;
+            float sum = 0;
             for(k = 0; k < K; ++k){
                 sum += ALPHA*A[i*lda+k]*B[k+j*ldb];
             }
@@ -44,7 +44,7 @@ void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
     int i,j,k;
     for(i = 0; i < M; ++i){
         for(k = 0; k < K; ++k){
-            register float A_PART = ALPHA*A[k*lda+i];
+            float A_PART = ALPHA * A[k * lda + i];
             for(j = 0; j < N; ++j){
                 C[i*ldc+j] += A_PART*B[k*ldb+j];
             }
diff --git a/src/crnn_layer.c b/src/crnn_layer.c
index 5a9d6f58..08daa6ef 100644
--- a/src/crnn_layer.c
+++ b/src/crnn_layer.c
@@ -30,7 +30,7 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
 {
     fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
     batch = batch / steps;
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.batch = batch;
     l.type = CRNN;
     l.steps = steps;
@@ -44,22 +44,19 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
     l.hidden = h * w * hidden_filters;
     l.outputs = l.out_h * l.out_w * l.out_c;
 
-    l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
+    l.state = (float*)calloc(l.hidden * batch * (steps + 1), sizeof(float));
 
-    l.input_layer = malloc(sizeof(layer));
-    fprintf(stderr, "");
+    l.input_layer = (layer*)malloc(sizeof(layer));
     *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
     l.input_layer->batch = batch;
     if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
 
-    l.self_layer = malloc(sizeof(layer));
-    fprintf(stderr, "");
+    l.self_layer = (layer*)malloc(sizeof(layer));
     *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
     l.self_layer->batch = batch;
     if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
 
-    l.output_layer = malloc(sizeof(layer));
-    fprintf(stderr, "");
+    l.output_layer = (layer*)malloc(sizeof(layer));
     *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
     l.output_layer->batch = batch;
     if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
diff --git a/src/crnn_layer.h b/src/crnn_layer.h
index 867d3ed6..cfbdd265 100644
--- a/src/crnn_layer.h
+++ b/src/crnn_layer.h
@@ -6,6 +6,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize);
 
 void forward_crnn_layer(layer l, network_state state);
@@ -20,5 +23,8 @@ void push_crnn_layer(layer l);
 void pull_crnn_layer(layer l);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/crop_layer.c b/src/crop_layer.c
index 11c59b49..816f00e1 100644
--- a/src/crop_layer.c
+++ b/src/crop_layer.c
@@ -16,7 +16,7 @@ void backward_crop_layer_gpu(const crop_layer l, network_state state){}
 crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
 {
     fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
-    crop_layer l = {0};
+    crop_layer l = { (LAYER_TYPE)0 };
     l.type = CROP;
     l.batch = batch;
     l.h = h;
@@ -32,7 +32,7 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int
     l.out_c = c;
     l.inputs = l.w * l.h * l.c;
     l.outputs = l.out_w * l.out_h * l.out_c;
-    l.output = calloc(l.outputs*batch, sizeof(float));
+    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
     l.forward = forward_crop_layer;
     l.backward = backward_crop_layer;
 
@@ -56,7 +56,7 @@ void resize_crop_layer(layer *l, int w, int h)
     l->inputs = l->w * l->h * l->c;
     l->outputs = l->out_h * l->out_w * l->out_c;
 
-    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
+    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
     #ifdef GPU
     cuda_free(l->output_gpu);
     l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
diff --git a/src/crop_layer.h b/src/crop_layer.h
index 3aa2d3dd..31958244 100644
--- a/src/crop_layer.h
+++ b/src/crop_layer.h
@@ -7,6 +7,9 @@
 
 typedef layer crop_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 image get_crop_image(crop_layer l);
 crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
 void forward_crop_layer(const crop_layer l, network_state state);
@@ -16,5 +19,8 @@ void resize_crop_layer(layer *l, int w, int h);
 void forward_crop_layer_gpu(crop_layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu
index 8bbe0f54..724196c3 100644
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@@ -2,12 +2,10 @@
 #include "curand.h"
 #include "cublas_v2.h"
 
-extern "C" {
 #include "crop_layer.h"
 #include "utils.h"
 #include "cuda.h"
 #include "image.h"
-}
 
 __device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
 {
diff --git a/src/cuda.c b/src/cuda.c
index b790e7f5..87402e5a 100644
--- a/src/cuda.c
+++ b/src/cuda.c
@@ -1,4 +1,10 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
 int gpu_index = 0;
+#ifdef __cplusplus
+}
+#endif // __cplusplus
 
 #ifdef GPU
 
@@ -71,7 +77,7 @@ dim3 cuda_gridsize(size_t n){
         x = ceil(sqrt(k));
         y = (n-1)/(x*BLOCK) + 1;
     }
-    dim3 d = {x, y, 1};
+    dim3 d = { (unsigned int)x, (unsigned int)y, 1 };
     //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
     return d;
 }
@@ -188,7 +194,7 @@ cublasHandle_t blas_handle()
     if(!init[i]) {
         cublasCreate(&handle[i]);
         cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream());
-        CHECK_CUDA(status);
+        CHECK_CUDA((cudaError_t)status);
         init[i] = 1;
     }
     return handle[i];
@@ -226,7 +232,7 @@ void cuda_random(float *x_gpu, size_t n)
 
 float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
 {
-    float *tmp = calloc(n, sizeof(float));
+    float* tmp = (float*)calloc(n, sizeof(float));
     cuda_pull_array(x_gpu, tmp, n);
     //int i;
     //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
@@ -310,6 +316,6 @@ int get_gpu_compute_capability(int i)
 }
 
 #else // GPU
-#include "cuda.h"
+#include "darknet.h"
 void cuda_set_device(int n) {}
 #endif // GPU
diff --git a/src/cuda.h b/src/cuda.h
index a1e0e4eb..4010494e 100644
--- a/src/cuda.h
+++ b/src/cuda.h
@@ -1,25 +1,27 @@
-#ifndef CUDA_H
-#define CUDA_H
+#ifndef DARKCUDA_H
+#define DARKCUDA_H
 #include "darknet.h"
 
-#if defined(_MSC_VER) && _MSC_VER < 1900
-	#define inline __inline
+#ifdef __cplusplus
+extern "C" {
 #endif
 
 extern int gpu_index;
+#ifdef __cplusplus
+}
+#endif // __cplusplus
 
 #ifdef GPU
 
-#define BLOCK 512
 
-#include "cuda_runtime.h"
-#include "curand.h"
-#include "cublas_v2.h"
-#include "cuda_runtime_api.h"
-//#include "driver_types.h"
+#include <cuda_runtime.h>
+#include <curand.h>
+#include <cublas_v2.h>
+#include <cuda_runtime_api.h>
+#include <driver_types.h>
 
 #ifdef CUDNN
-#include "cudnn.h"
+#include <cudnn.h>
 #endif // CUDNN
 
 #ifndef __DATE__
@@ -65,9 +67,6 @@ extern "C" {
     cudaStream_t get_cuda_memcpy_stream();
     int get_number_of_blocks(int array_size, int block_size);
     int get_gpu_compute_capability(int i);
-#ifdef __cplusplus
-}
-#endif // __cplusplus
 
 #ifdef CUDNN
 cudnnHandle_t cudnn_handle();
@@ -77,6 +76,10 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line
 #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
 #endif
 
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
 #else // GPU
 //LIB_API void cuda_set_device(int n);
 #endif // GPU
diff --git a/src/darknet.c b/src/darknet.c
index 074dc8b9..87175bc3 100644
--- a/src/darknet.c
+++ b/src/darknet.c
@@ -10,7 +10,7 @@
 #include "connected_layer.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
@@ -258,12 +258,12 @@ layer normalize_layer(layer l, int n)
 {
     int j;
     l.batch_normalize=1;
-    l.scales = calloc(n, sizeof(float));
+    l.scales = (float*)calloc(n, sizeof(float));
     for(j = 0; j < n; ++j){
         l.scales[j] = 1;
     }
-    l.rolling_mean = calloc(n, sizeof(float));
-    l.rolling_variance = calloc(n, sizeof(float));
+    l.rolling_mean = (float*)calloc(n, sizeof(float));
+    l.rolling_variance = (float*)calloc(n, sizeof(float));
     return l;
 }
 
diff --git a/src/unistd.h b/src/darkunistd.h
similarity index 88%
rename from src/unistd.h
rename to src/darkunistd.h
index 4c51f975..bb97da5f 100644
--- a/src/unistd.h
+++ b/src/darkunistd.h
@@ -1,3 +1,4 @@
+#ifdef _WIN32
 #ifndef _UNISTD_H
 #define _UNISTD_H    1
 
@@ -6,12 +7,13 @@
 *  Please add functionality as needed
 */
 
-#include <stdlib.h>
+#include <Winsock2.h>
+#include <direct.h> /* for _getcwd() and _chdir() */
+#include <getopt.h>
 #include <io.h>
 #include <process.h> /* for getpid() and the exec..() family */
-#include <direct.h> /* for _getcwd() and _chdir() */
+#include <stdlib.h>
 
-#include "getopt.h" /* getopt at: https://gist.github.com/ashelly/7776712 */
 #define srandom srand
 #define random rand
 
@@ -19,7 +21,7 @@
 These may be OR'd together.  */
 #define R_OK    4       /* Test for read permission.  */
 #define W_OK    2       /* Test for write permission.  */
-//#define   X_OK    1       /* execute permission - unsupported in windows*/
+#define X_OK R_OK /* execute permission - unsupported in Windows, \
 #define F_OK    0       /* Test for existence.  */
 
 #define access _access
@@ -48,5 +50,7 @@ These may be OR'd together.  */
 //typedef unsigned __int16  uint16_t;
 //typedef unsigned __int32  uint32_t;
 //typedef unsigned __int64  uint64_t;
-
-#endif /* unistd.h  */
\ No newline at end of file
+#endif /* _UNISTD_H  */
+#else
+#include <unistd.h>
+#endif /* _WIN32  */
diff --git a/src/data.c b/src/data.c
index d152fde4..09b1b2e2 100644
--- a/src/data.c
+++ b/src/data.c
@@ -41,7 +41,7 @@ char **get_random_paths_indexes(char **paths, int n, int m, int *indexes)
 
 char **get_random_paths(char **paths, int n, int m)
 {
-    char **random_paths = calloc(n, sizeof(char*));
+    char** random_paths = (char**)calloc(n, sizeof(char*));
     int i;
     pthread_mutex_lock(&mutex);
     //printf("n = %d \n", n);
@@ -60,7 +60,7 @@ char **get_random_paths(char **paths, int n, int m)
 
 char **find_replace_paths(char **paths, int n, char *find, char *replace)
 {
-    char **replace_paths = calloc(n, sizeof(char*));
+    char** replace_paths = (char**)calloc(n, sizeof(char*));
     int i;
     for(i = 0; i < n; ++i){
         char replaced[4096];
@@ -75,7 +75,7 @@ matrix load_image_paths_gray(char **paths, int n, int w, int h)
     int i;
     matrix X;
     X.rows = n;
-    X.vals = calloc(X.rows, sizeof(float*));
+    X.vals = (float**)calloc(X.rows, sizeof(float*));
     X.cols = 0;
 
     for(i = 0; i < n; ++i){
@@ -96,7 +96,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
     int i;
     matrix X;
     X.rows = n;
-    X.vals = calloc(X.rows, sizeof(float*));
+    X.vals = (float**)calloc(X.rows, sizeof(float*));
     X.cols = 0;
 
     for(i = 0; i < n; ++i){
@@ -112,7 +112,7 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
     int i;
     matrix X;
     X.rows = n;
-    X.vals = calloc(X.rows, sizeof(float*));
+    X.vals = (float**)calloc(X.rows, sizeof(float*));
     X.cols = 0;
 
     for(i = 0; i < n; ++i){
@@ -139,7 +139,7 @@ extern int check_mistakes;
 
 box_label *read_boxes(char *filename, int *n)
 {
-    box_label *boxes = calloc(1, sizeof(box_label));
+    box_label* boxes = (box_label*)calloc(1, sizeof(box_label));
     FILE *file = fopen(filename, "r");
     if (!file) {
         printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename);
@@ -158,7 +158,7 @@ box_label *read_boxes(char *filename, int *n)
     int id;
     int count = 0;
     while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
-        boxes = realloc(boxes, (count+1)*sizeof(box_label));
+        boxes = (box_label*)realloc(boxes, (count + 1) * sizeof(box_label));
         boxes[count].id = id;
         boxes[count].x = x;
         boxes[count].y = y;
@@ -300,7 +300,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
     free(boxes);
 }
 
-void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
+void fill_truth_detection(const char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
     int small_object, int net_w, int net_h)
 {
     char labelpath[4096];
@@ -391,7 +391,6 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
     free(boxes);
 }
 
-#define NUMCHARS 37
 
 void print_letters(float *pred, int n)
 {
@@ -565,7 +564,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
     d.shallow = 0;
 
     d.X.rows = n;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*3;
 
 
@@ -619,7 +618,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
     d.shallow = 0;
 
     d.X.rows = n;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*6;
 
     int k = 2*(classes);
@@ -628,7 +627,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
         image im1 = load_image_color(paths[i*2],   w, h);
         image im2 = load_image_color(paths[i*2+1], w, h);
 
-        d.X.vals[i] = calloc(d.X.cols, sizeof(float));
+        d.X.vals[i] = (float*)calloc(d.X.cols, sizeof(float));
         memcpy(d.X.vals[i],         im1.data, h*w*3*sizeof(float));
         memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float));
 
@@ -690,7 +689,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
     d.h = h;
 
     d.X.rows = 1;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*3;
 
     int k = (4+classes)*30;
@@ -729,12 +728,12 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
 }
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
-#include "opencv2/core/version.hpp"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
-#include "opencv2/imgcodecs/imgcodecs_c.h"
+#include <opencv2/videoio/videoio_c.h>
+#include <opencv2/imgcodecs/imgcodecs_c.h>
 #endif
 
 #include "http_stream.h"
@@ -748,7 +747,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
     d.shallow = 0;
 
     d.X.rows = n;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*c;
 
     d.y = make_matrix(n, 5*boxes);
@@ -817,7 +816,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
     d.shallow = 0;
 
     d.X.rows = n;
-    d.X.vals = calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*c;
 
     d.y = make_matrix(n, 5 * boxes);
@@ -903,7 +902,7 @@ void *load_thread(void *ptr)
 pthread_t load_data_in_thread(load_args args)
 {
     pthread_t thread;
-    struct load_args *ptr = calloc(1, sizeof(struct load_args));
+    struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
     *ptr = args;
     if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed");
     return thread;
@@ -918,8 +917,8 @@ void *load_threads(void *ptr)
     data *out = args.d;
     int total = args.n;
     free(ptr);
-    data *buffers = calloc(args.threads, sizeof(data));
-    pthread_t *threads = calloc(args.threads, sizeof(pthread_t));
+    data* buffers = (data*)calloc(args.threads, sizeof(data));
+    pthread_t* threads = (pthread_t*)calloc(args.threads, sizeof(pthread_t));
     for(i = 0; i < args.threads; ++i){
         args.d = buffers + i;
         args.n = (i+1) * total/args.threads - i * total/args.threads;
@@ -942,7 +941,7 @@ void *load_threads(void *ptr)
 pthread_t load_data(load_args args)
 {
     pthread_t thread;
-    struct load_args *ptr = calloc(1, sizeof(struct load_args));
+    struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
     *ptr = args;
     if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
     return thread;
@@ -996,11 +995,11 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
 
     int i;
     d.X.rows = n;
-    d.X.vals = calloc(n, sizeof(float*));
+    d.X.vals = (float**)calloc(n, sizeof(float*));
     d.X.cols = w*h*3;
 
     d.y.rows = n;
-    d.y.vals = calloc(n, sizeof(float*));
+    d.y.vals = (float**)calloc(n, sizeof(float*));
     d.y.cols = w*scale * h*scale * 3;
 
     for(i = 0; i < n; ++i){
@@ -1048,7 +1047,7 @@ matrix concat_matrix(matrix m1, matrix m2)
     matrix m;
     m.cols = m1.cols;
     m.rows = m1.rows+m2.rows;
-    m.vals = calloc(m1.rows + m2.rows, sizeof(float*));
+    m.vals = (float**)calloc(m1.rows + m2.rows, sizeof(float*));
     for(i = 0; i < m1.rows; ++i){
         m.vals[count++] = m1.vals[i];
     }
@@ -1072,9 +1071,9 @@ data concat_datas(data *d, int n)
     int i;
     data out = {0};
     for(i = 0; i < n; ++i){
-        data new = concat_data(d[i], out);
+        data newdata = concat_data(d[i], out);
         free_data(out);
-        out = new;
+        out = newdata;
     }
     return out;
 }
@@ -1300,8 +1299,8 @@ data get_random_data(data d, int num)
     r.X.cols = d.X.cols;
     r.y.cols = d.y.cols;
 
-    r.X.vals = calloc(num, sizeof(float *));
-    r.y.vals = calloc(num, sizeof(float *));
+    r.X.vals = (float**)calloc(num, sizeof(float*));
+    r.y.vals = (float**)calloc(num, sizeof(float*));
 
     int i;
     for(i = 0; i < num; ++i){
@@ -1314,7 +1313,7 @@ data get_random_data(data d, int num)
 
 data *split_data(data d, int part, int total)
 {
-    data *split = calloc(2, sizeof(data));
+    data* split = (data*)calloc(2, sizeof(data));
     int i;
     int start = part*d.X.rows/total;
     int end = (part+1)*d.X.rows/total;
@@ -1327,10 +1326,10 @@ data *split_data(data d, int part, int total)
     train.X.cols = test.X.cols = d.X.cols;
     train.y.cols = test.y.cols = d.y.cols;
 
-    train.X.vals = calloc(train.X.rows, sizeof(float*));
-    test.X.vals = calloc(test.X.rows, sizeof(float*));
-    train.y.vals = calloc(train.y.rows, sizeof(float*));
-    test.y.vals = calloc(test.y.rows, sizeof(float*));
+    train.X.vals = (float**)calloc(train.X.rows, sizeof(float*));
+    test.X.vals = (float**)calloc(test.X.rows, sizeof(float*));
+    train.y.vals = (float**)calloc(train.y.rows, sizeof(float*));
+    test.y.vals = (float**)calloc(test.y.rows, sizeof(float*));
 
     for(i = 0; i < start; ++i){
         train.X.vals[i] = d.X.vals[i];
diff --git a/src/data.h b/src/data.h
index 844680dc..58cd1178 100644
--- a/src/data.h
+++ b/src/data.h
@@ -2,14 +2,14 @@
 #define DATA_H
 #include <pthread.h>
 
-#if defined(_MSC_VER) && _MSC_VER < 1900
-	#define inline __inline
-#endif
-
+#include "darknet.h"
 #include "darknet.h"
 #include "matrix.h"
 #include "list.h"
 #include "image.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
 #include "tree.h"
 
 static inline float distance_from_edge(int x, int max)
@@ -115,5 +115,8 @@ data *split_data(data d, int part, int total);
 data concat_data(data d1, data d2);
 data concat_datas(data *d, int n);
 void fill_truth(char *path, char **labels, int k, float *truth);
+#ifdef __cplusplus
+}
 
 #endif
+#endif
diff --git a/src/deconvolutional_kernels.cu b/src/deconvolutional_kernels.cu
index d6259fb3..88106ece 100644
--- a/src/deconvolutional_kernels.cu
+++ b/src/deconvolutional_kernels.cu
@@ -2,7 +2,6 @@
 #include "curand.h"
 #include "cublas_v2.h"
 
-extern "C" {
 #include "convolutional_layer.h"
 #include "deconvolutional_layer.h"
 #include "gemm.h"
@@ -11,7 +10,6 @@ extern "C" {
 #include "col2im.h"
 #include "utils.h"
 #include "cuda.h"
-}
 
 extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
 {
@@ -95,7 +93,7 @@ extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
     cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
 }
 
-extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
+extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay)
 {
     int size = layer.size*layer.size*layer.c*layer.n;
 
diff --git a/src/deconvolutional_layer.c b/src/deconvolutional_layer.c
index fbef9d58..a109999f 100644
--- a/src/deconvolutional_layer.c
+++ b/src/deconvolutional_layer.c
@@ -46,7 +46,7 @@ image get_deconvolutional_delta(deconvolutional_layer l)
 deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
 {
     int i;
-    deconvolutional_layer l = {0};
+    deconvolutional_layer l = { (LAYER_TYPE)0 };
     l.type = DECONVOLUTIONAL;
 
     l.h = h;
@@ -57,11 +57,11 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
     l.stride = stride;
     l.size = size;
 
-    l.weights = calloc(c*n*size*size, sizeof(float));
-    l.weight_updates = calloc(c*n*size*size, sizeof(float));
+    l.weights = (float*)calloc(c * n * size * size, sizeof(float));
+    l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
 
-    l.biases = calloc(n, sizeof(float));
-    l.bias_updates = calloc(n, sizeof(float));
+    l.biases = (float*)calloc(n, sizeof(float));
+    l.bias_updates = (float*)calloc(n, sizeof(float));
     float scale = 1./sqrt(size*size*c);
     for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
     for(i = 0; i < n; ++i){
@@ -76,9 +76,9 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
     l.outputs = l.out_w * l.out_h * l.out_c;
     l.inputs = l.w * l.h * l.c;
 
-    l.col_image = calloc(h*w*size*size*n, sizeof(float));
-    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
-    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));
+    l.col_image = (float*)calloc(h * w * size * size * n, sizeof(float));
+    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
 
     l.forward = forward_deconvolutional_layer;
     l.backward = backward_deconvolutional_layer;
@@ -110,11 +110,11 @@ void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w)
     int out_h = deconvolutional_out_height(*l);
     int out_w = deconvolutional_out_width(*l);
 
-    l->col_image = realloc(l->col_image,
+    l->col_image = (float*)realloc(l->col_image,
                                 out_h*out_w*l->size*l->size*l->c*sizeof(float));
-    l->output = realloc(l->output,
+    l->output = (float*)realloc(l->output,
                                 l->batch*out_h * out_w * l->n*sizeof(float));
-    l->delta  = realloc(l->delta,
+    l->delta = (float*)realloc(l->delta,
                                 l->batch*out_h * out_w * l->n*sizeof(float));
     #ifdef GPU
     cuda_free(l->col_image_gpu);
@@ -191,7 +191,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
     }
 }
 
-void update_deconvolutional_layer(deconvolutional_layer l, float learning_rate, float momentum, float decay)
+void update_deconvolutional_layer(deconvolutional_layer l, int skip, float learning_rate, float momentum, float decay)
 {
     int size = l.size*l.size*l.c*l.n;
     axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1);
diff --git a/src/deconvolutional_layer.h b/src/deconvolutional_layer.h
index 2d36e02a..2e1c4527 100644
--- a/src/deconvolutional_layer.h
+++ b/src/deconvolutional_layer.h
@@ -9,10 +9,13 @@
 
 typedef layer deconvolutional_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 #ifdef GPU
 void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
 void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
-void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
+void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay);
 void push_deconvolutional_layer(deconvolutional_layer layer);
 void pull_deconvolutional_layer(deconvolutional_layer layer);
 #endif
@@ -20,7 +23,7 @@ void pull_deconvolutional_layer(deconvolutional_layer layer);
 deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
 void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
 void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
-void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
+void update_deconvolutional_layer(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay);
 void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
 
 image get_deconvolutional_image(deconvolutional_layer layer);
@@ -30,5 +33,8 @@ image get_deconvolutional_filter(deconvolutional_layer layer, int i);
 int deconvolutional_out_height(deconvolutional_layer layer);
 int deconvolutional_out_width(deconvolutional_layer layer);
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/demo.c b/src/demo.c
index 9029ff1e..80f925ac 100644
--- a/src/demo.c
+++ b/src/demo.c
@@ -9,20 +9,18 @@
 #include "demo.h"
 #ifdef WIN32
 #include <time.h>
-#include <winsock.h>
 #include "gettimeofday.h"
 #else
 #include <sys/time.h>
 #endif
 
-#define FRAMES 3
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
-#include "opencv2/core/version.hpp"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
 #include "http_stream.h"
 image get_image_from_stream(CvCapture *cap);
@@ -45,10 +43,10 @@ static int demo_ext_output = 0;
 static long long int frame_id = 0;
 static int demo_json_port = -1;
 
-static float *predictions[FRAMES];
+static float* predictions[NFRAMES];
 static int demo_index = 0;
-static image images[FRAMES];
-static IplImage* ipl_images[FRAMES];
+static image images[NFRAMES];
+static IplImage* ipl_images[NFRAMES];
 static float *avg;
 
 void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output);
@@ -77,7 +75,7 @@ void *fetch_in_thread(void *ptr)
         //error("Stream closed.");
         printf("Stream closed.\n");
         flag_exit = 1;
-        return EXIT_FAILURE;
+        exit(EXIT_FAILURE);
     }
     //in_s = resize_image(in, net.w, net.h);
 
@@ -91,14 +89,14 @@ void *detect_in_thread(void *ptr)
     float *prediction = network_predict(net, X);
 
     memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
-    mean_arrays(predictions, FRAMES, l.outputs, avg);
+    mean_arrays(predictions, NFRAMES, l.outputs, avg);
     l.output = avg;
 
     free_image(det_s);
 
     ipl_images[demo_index] = det_img;
-    det_img = ipl_images[(demo_index + FRAMES / 2 + 1) % FRAMES];
-    demo_index = (demo_index + 1) % FRAMES;
+    det_img = ipl_images[(demo_index + NFRAMES / 2 + 1) % NFRAMES];
+    demo_index = (demo_index + 1) % NFRAMES;
 
     if (letter_box)
         dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
@@ -110,11 +108,11 @@ void *detect_in_thread(void *ptr)
 
 double get_wall_time()
 {
-    struct timeval time;
-    if (gettimeofday(&time,NULL)){
+    struct timeval walltime;
+    if (gettimeofday(&walltime, NULL)) {
         return 0;
     }
-    return (double)time.tv_sec + (double)time.tv_usec * .000001;
+    return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001;
 }
 
 void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
@@ -161,8 +159,8 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
     int j;
 
     avg = (float *) calloc(l.outputs, sizeof(float));
-    for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
-    for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3);
+    for(j = 0; j < NFRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
+    for(j = 0; j < NFRAMES; ++j) images[j] = make_image(1,1,3);
 
     if (l.classes != demo_classes) {
         printf("Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes);
@@ -185,7 +183,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
     det_img = in_img;
     det_s = in_s;
 
-    for(j = 0; j < FRAMES/2; ++j){
+    for (j = 0; j < NFRAMES / 2; ++j) {
         fetch_in_thread(0);
         detect_in_thread(0);
         det_img = in_img;
@@ -318,10 +316,10 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
     free_image(in_s);
 
     free(avg);
-    for (j = 0; j < FRAMES; ++j) free(predictions[j]);
-    for (j = 0; j < FRAMES; ++j) free_image(images[j]);
+    for (j = 0; j < NFRAMES; ++j) free(predictions[j]);
+    for (j = 0; j < NFRAMES; ++j) free_image(images[j]);
 
-    free_ptrs(names, net.layers[net.n - 1].classes);
+    free_ptrs((void **)names, net.layers[net.n - 1].classes);
 
     int i;
     const int nsize = 8;
@@ -342,4 +340,3 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
     fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
 }
 #endif
-
diff --git a/src/demo.h b/src/demo.h
index 5db79346..b26b9592 100644
--- a/src/demo.h
+++ b/src/demo.h
@@ -1,8 +1,14 @@
-#ifndef DEMO
-#define DEMO
+#ifndef DEMO_H
+#define DEMO_H
 
 #include "image.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
 void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
     int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int json_port, int dont_show, int ext_output);
+#ifdef __cplusplus
+}
+#endif
 
 #endif
diff --git a/src/detection_layer.c b/src/detection_layer.c
index 0a1c1070..4cfe591d 100644
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@@ -12,7 +12,7 @@
 
 detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore)
 {
-    detection_layer l = {0};
+    detection_layer l = { (LAYER_TYPE)0 };
     l.type = DETECTION;
 
     l.n = n;
@@ -25,11 +25,11 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
     l.w = side;
     l.h = side;
     assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
-    l.cost = calloc(1, sizeof(float));
+    l.cost = (float*)calloc(1, sizeof(float));
     l.outputs = l.inputs;
     l.truths = l.side*l.side*(1+l.coords+l.classes);
-    l.output = calloc(batch*l.outputs, sizeof(float));
-    l.delta = calloc(batch*l.outputs, sizeof(float));
+    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
 
     l.forward = forward_detection_layer;
     l.backward = backward_detection_layer;
@@ -182,7 +182,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
         }
 
         if(0){
-            float *costs = calloc(l.batch*locations*l.n, sizeof(float));
+            float* costs = (float*)calloc(l.batch * locations * l.n, sizeof(float));
             for (b = 0; b < l.batch; ++b) {
                 int index = b*l.inputs;
                 for (i = 0; i < locations; ++i) {
@@ -259,11 +259,11 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state)
         return;
     }
 
-    float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
+    float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
     float *truth_cpu = 0;
     if(state.truth){
         int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
-        truth_cpu = calloc(num_truth, sizeof(float));
+        truth_cpu = (float*)calloc(num_truth, sizeof(float));
         cuda_pull_array(state.truth, truth_cpu, num_truth);
     }
     cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);
diff --git a/src/detection_layer.h b/src/detection_layer.h
index 9d2da928..f97bc39a 100644
--- a/src/detection_layer.h
+++ b/src/detection_layer.h
@@ -6,6 +6,9 @@
 
 typedef layer detection_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
 void forward_detection_layer(const detection_layer l, network_state state);
 void backward_detection_layer(const detection_layer l, network_state state);
@@ -17,4 +20,7 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state);
 void backward_detection_layer_gpu(detection_layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/detector.c b/src/detector.c
index 5524ac19..96921689 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -9,27 +9,33 @@
 #include "option_list.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/core/core_c.h"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/core/core_c.h>
 //#include "opencv2/core/core.hpp"
-#include "opencv2/core/version.hpp"
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/core/version.hpp>
+#include <opencv2/imgproc/imgproc_c.h>
 
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION)
+#ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib")
+#endif    // USE_CMAKE_LIBS
 #else
 #define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)"" CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)
+#ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib")
 #pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib")
 #pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib")
+#endif    // USE_CMAKE_LIBS
 #endif
 IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
 
 void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
     float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port);
+#endif // OPENCV
 
+#ifndef CV_RGB
 #define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
 #endif    // OPENCV
 
@@ -81,7 +87,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
     float avg_loss = -1;
-    network *nets = calloc(ngpus, sizeof(network));
+    network* nets = (network*)calloc(ngpus, sizeof(network));
 
     srand(time(0));
     int seed = rand();
@@ -410,8 +416,8 @@ void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int
         if (ymax > h) ymax = h;
 
         for (j = 0; j < classes; ++j) {
-            int class = j;
-            if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[class],
+            int myclass = j;
+            if (dets[i].prob[myclass]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[myclass],
                 xmin, ymin, xmax, ymax);
         }
     }
@@ -465,7 +471,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
     }
     else {
         if (!outfile) outfile = "comp4_det_test_";
-        fps = calloc(classes, sizeof(FILE *));
+        fps = (FILE**)calloc(classes, sizeof(FILE*));
         for (j = 0; j < classes; ++j) {
             snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
             fps[j] = fopen(buff, "w");
@@ -482,11 +488,11 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
 
     int nthreads = 4;
     if (m < 4) nthreads = m;
-    image *val = calloc(nthreads, sizeof(image));
-    image *val_resized = calloc(nthreads, sizeof(image));
-    image *buf = calloc(nthreads, sizeof(image));
-    image *buf_resized = calloc(nthreads, sizeof(image));
-    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)calloc(nthreads, sizeof(image));
+    image* val_resized = (image*)calloc(nthreads, sizeof(image));
+    image* buf = (image*)calloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
 
     load_args args = { 0 };
     args.w = net.w;
@@ -702,11 +708,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
 
     int nthreads = 4;
     if (m < 4) nthreads = m;
-    image *val = calloc(nthreads, sizeof(image));
-    image *val_resized = calloc(nthreads, sizeof(image));
-    image *buf = calloc(nthreads, sizeof(image));
-    image *buf_resized = calloc(nthreads, sizeof(image));
-    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)calloc(nthreads, sizeof(image));
+    image* val_resized = (image*)calloc(nthreads, sizeof(image));
+    image* buf = (image*)calloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
 
     load_args args = { 0 };
     args.w = net.w;
@@ -720,11 +726,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     int tp_for_thresh = 0;
     int fp_for_thresh = 0;
 
-    box_prob *detections = calloc(1, sizeof(box_prob));
+    box_prob* detections = (box_prob*)calloc(1, sizeof(box_prob));
     int detections_count = 0;
     int unique_truth_count = 0;
 
-    int *truth_classes_count = calloc(classes, sizeof(int));
+    int* truth_classes_count = (int*)calloc(classes, sizeof(int));
 
     for (t = 0; t < nthreads; ++t) {
         args.path = paths[i + t];
@@ -798,7 +804,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
                     float prob = dets[i].prob[class_id];
                     if (prob > 0) {
                         detections_count++;
-                        detections = realloc(detections, detections_count * sizeof(box_prob));
+                        detections = (box_prob*)realloc(detections, detections_count * sizeof(box_prob));
                         detections[detections_count - 1].b = dets[i].bbox;
                         detections[detections_count - 1].p = prob;
                         detections[detections_count - 1].image_index = image_index;
@@ -890,14 +896,14 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     } pr_t;
 
     // for PR-curve
-    pr_t **pr = calloc(classes, sizeof(pr_t*));
+    pr_t** pr = (pr_t**)calloc(classes, sizeof(pr_t*));
     for (i = 0; i < classes; ++i) {
-        pr[i] = calloc(detections_count, sizeof(pr_t));
+        pr[i] = (pr_t*)calloc(detections_count, sizeof(pr_t));
     }
     printf("\n detections_count = %d, unique_truth_count = %d  \n", detections_count, unique_truth_count);
 
 
-    int *truth_flags = calloc(unique_truth_count, sizeof(int));
+    int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int));
 
     int rank;
     for (rank = 0; rank < detections_count; ++rank) {
@@ -993,7 +999,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     if (reinforcement_fd != NULL) fclose(reinforcement_fd);
 
     // free memory
-    free_ptrs(names, net.layers[net.n - 1].classes);
+    free_ptrs((void**)names, net.layers[net.n - 1].classes);
     free_list_contents_kvp(options);
     free_list(options);
 
@@ -1043,7 +1049,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
     }
 
     //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 };
-    float *rel_width_height_array = calloc(1000, sizeof(float));
+    float* rel_width_height_array = (float*)calloc(1000, sizeof(float));
 
 
     list *options = read_data_cfg(datacfg);
@@ -1079,7 +1085,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
                 if (check_mistakes) getchar();
             }
             number_of_boxes++;
-            rel_width_height_array = realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
+            rel_width_height_array = (float*)realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
             rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width;
             rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height;
             printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes);
@@ -1104,7 +1110,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
     // K-means
     anchors_data = do_kmeans(boxes_data, num_of_clusters);
 
-    qsort(anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), anchors_data_comparator);
+    qsort((void*)anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), (__compar_fn_t)anchors_data_comparator);
 
     //gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66
     //float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 };
@@ -1285,8 +1291,8 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
         layer l = net.layers[net.n - 1];
 
         //box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
-        //float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
-        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
+        //float **probs = calloc(l.w*l.h*l.n, sizeof(float*));
+        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
 
         float *X = sized.data;
 
@@ -1365,7 +1371,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
     }
 
     // free memory
-    free_ptrs(names, net.layers[net.n - 1].classes);
+    free_ptrs((void**)names, net.layers[net.n - 1].classes);
     free_list_contents_kvp(options);
     free_list(options);
 
@@ -1421,7 +1427,7 @@ void run_detector(int argc, char **argv)
         for (i = 0; i < len; ++i) {
             if (gpu_list[i] == ',') ++ngpus;
         }
-        gpus = calloc(ngpus, sizeof(int));
+        gpus = (int*)calloc(ngpus, sizeof(int));
         for (i = 0; i < ngpus; ++i) {
             gpus[i] = atoi(gpu_list);
             gpu_list = strchr(gpu_list, ',') + 1;
diff --git a/src/dice.c b/src/dice.c
index 22864598..348a4aed 100644
--- a/src/dice.c
+++ b/src/dice.c
@@ -9,7 +9,7 @@ void train_dice(char *cfgfile, char *weightfile)
     srand(time(0));
     float avg_loss = -1;
     char *base = basecfg(cfgfile);
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     printf("%s\n", base);
     network net = parse_network_cfg(cfgfile);
     if(weightfile){
diff --git a/src/dropout_layer.c b/src/dropout_layer.c
index b1381e63..599acc8f 100644
--- a/src/dropout_layer.c
+++ b/src/dropout_layer.c
@@ -6,13 +6,13 @@
 
 dropout_layer make_dropout_layer(int batch, int inputs, float probability)
 {
-    dropout_layer l = {0};
+    dropout_layer l = { (LAYER_TYPE)0 };
     l.type = DROPOUT;
     l.probability = probability;
     l.inputs = inputs;
     l.outputs = inputs;
     l.batch = batch;
-    l.rand = calloc(inputs*batch, sizeof(float));
+    l.rand = (float*)calloc(inputs * batch, sizeof(float));
     l.scale = 1./(1.-probability);
     l.forward = forward_dropout_layer;
     l.backward = backward_dropout_layer;
@@ -27,7 +27,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
 
 void resize_dropout_layer(dropout_layer *l, int inputs)
 {
-    l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float));
+    l->rand = (float*)realloc(l->rand, l->inputs * l->batch * sizeof(float));
     #ifdef GPU
     cuda_free(l->rand_gpu);
 
diff --git a/src/dropout_layer.h b/src/dropout_layer.h
index 691cfc5b..25bea386 100644
--- a/src/dropout_layer.h
+++ b/src/dropout_layer.h
@@ -6,6 +6,9 @@
 
 typedef layer dropout_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 dropout_layer make_dropout_layer(int batch, int inputs, float probability);
 
 void forward_dropout_layer(dropout_layer l, network_state state);
@@ -16,5 +19,8 @@ void resize_dropout_layer(dropout_layer *l, int inputs);
 void forward_dropout_layer_gpu(dropout_layer l, network_state state);
 void backward_dropout_layer_gpu(dropout_layer l, network_state state);
 
+#endif
+#ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/src/dropout_layer_kernels.cu b/src/dropout_layer_kernels.cu
index 9b2d4f88..f6a93c91 100644
--- a/src/dropout_layer_kernels.cu
+++ b/src/dropout_layer_kernels.cu
@@ -2,11 +2,9 @@
 #include "curand.h"
 #include "cublas_v2.h"
 
-extern "C" {
 #include "dropout_layer.h"
 #include "cuda.h"
 #include "utils.h"
-}
 
 __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
 {
diff --git a/src/gemm.c b/src/gemm.c
index 022f1322..4b83c2f6 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -7,7 +7,10 @@
 #include <math.h>
 #include <float.h>
 #include <string.h>
-
+#include <stdint.h>
+#ifdef _WIN32
+#include <intrin.h>
+#endif
 #if defined(_OPENMP)
 #include <omp.h>
 #endif
@@ -37,7 +40,7 @@ void gemm_bin(int M, int N, int K, float ALPHA,
 float *random_matrix(int rows, int cols)
 {
     int i;
-    float *m = calloc(rows*cols, sizeof(float));
+    float* m = (float*)calloc(rows * cols, sizeof(float));
     for(i = 0; i < rows*cols; ++i){
         m[i] = (float)rand()/RAND_MAX;
     }
@@ -83,7 +86,6 @@ void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
 // XNOR bitwise GEMM for binary neural network
 //--------------------------------------------
 
-#include <stdint.h>
 
 static inline unsigned char xnor(unsigned char a, unsigned char b) {
     //return a == b;
@@ -318,6 +320,7 @@ void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb)
     }
 }
 
+#ifndef GPU
 uint8_t reverse_8_bit(uint8_t a) {
     return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16;
 }
@@ -465,6 +468,9 @@ void transpose_bin(char *A, char *B, const int n, const int m,
 }
 */
 
+#else
+extern void transpose_32x32_bits_reversed_diagonale(uint32_t* A, uint32_t* B, int m, int n);
+#endif
 
 // transpose by 32-bit
 void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
@@ -483,7 +489,7 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
             //transpose_32x32_bits_my(&A[a_index/32], &B[b_index/32], lda/32, ldb/32);
         }
         for (; j < m; ++j) {
-            if (get_bit(A, i*lda + j)) set_bit(B, j*ldb + i);
+            if (get_bit((const unsigned char* const)A, i * lda + j)) set_bit((unsigned char* const)B, j * ldb + i);
         }
     }
 }
@@ -703,7 +709,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
     else {
         for (i = 0; i < M; ++i) {
             for (k = 0; k < K; ++k) {
-                register float A_PART = ALPHA*A[i*lda + k];
+                float A_PART = ALPHA * A[i * lda + k];
                 for (j = 0; j < N; ++j) {
                     C[i*ldc + j] += A_PART*B[k*ldb + j];
                 }
@@ -730,9 +736,6 @@ void gemm_nn(int M, int N, int K, float ALPHA,
 }
 
 
-#define TILE_M 4    // 4 ops
-#define TILE_N 16   // AVX2 = 2 ops * 8 floats
-#define TILE_K 16   // loop
 
 void gemm_nn_fast(int M, int N, int K, float ALPHA,
     float *A, int lda,
@@ -1286,16 +1289,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
 }
 
 
-static inline float im2col_get_pixel(float *im, int height, int width, int channels,
-    int row, int col, int channel, int pad)
-{
-    row -= pad;
-    col -= pad;
 
-    if (row < 0 || col < 0 ||
-        row >= height || col >= width) return 0;
-    return im[col + width*(row + height*channel)];
-}
 
 //From Berkeley Vision's Caffe!
 //https://github.com/BVLC/caffe/blob/master/LICENSE
@@ -1645,7 +1639,7 @@ void im2col_cpu_custom_bin(float* data_im,
                     __m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS);
                     uint16_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1
 
-                    uint16_t *dst_ptr = &((unsigned char*)data_col)[col_index / 8];
+                    uint16_t* dst_ptr = &((uint16_t*)data_col)[col_index / 8];
                     *dst_ptr |= (mask << (col_index % 8));
                 }
 
@@ -1657,7 +1651,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
                     float val = data_im[im_col + width*(im_row + height*c_im)];
-                    if(val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                 }
             }
 
@@ -1671,7 +1665,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                     float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                 }
             }
 
@@ -1685,7 +1679,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                     float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                 }
             }
 
@@ -1699,7 +1693,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                     float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                 }
             }
 
@@ -1713,7 +1707,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                     float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char* const)data_col, col_index);
                 }
             }
         }
@@ -1952,7 +1946,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
     int i, j, k;
     for (i = 0; i < M; ++i) {
         for (k = 0; k < K; ++k) {
-            register float A_PART = ALPHA*A[i*lda + k];
+            float A_PART = ALPHA * A[i * lda + k];
             for (j = 0; j < N; ++j) {
                 C[i*ldc + j] += A_PART*B[k*ldb + j];
             }
@@ -2239,7 +2233,7 @@ void im2col_cpu_custom_bin(float* data_im,
                     int col_index = c * new_ldb + h * width_col + w;
 
                     float val = data_im[im_col + width*(im_row + height*c_im)];
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                 }
 
                 for (; w < width_col - pad; ++w) {
@@ -2250,7 +2244,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
                     float val = data_im[im_col + width*(im_row + height*c_im)];
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                 }
             }
 
@@ -2264,7 +2258,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                     float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                 }
             }
 
@@ -2278,7 +2272,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                     float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                 }
             }
 
@@ -2292,7 +2286,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                     float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                 }
             }
 
@@ -2306,7 +2300,7 @@ void im2col_cpu_custom_bin(float* data_im,
 
                     //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
                     float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
-                    if (val > 0) set_bit(data_col, col_index);
+                    if (val > 0) set_bit((unsigned char*)data_col, col_index);
                 }
             }
         }
@@ -2346,7 +2340,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size)
     memset(dst, 0, dst_size);
 
     size_t i;
-    char *byte_arr = calloc(size, sizeof(char));
+    char* byte_arr = (char*)calloc(size, sizeof(char));
     for (i = 0; i < size; ++i) {
         if (src[i] > 0) byte_arr[i] = 1;
     }
@@ -2578,7 +2572,7 @@ void gemm_nt(int M, int N, int K, float ALPHA,
     int i,j,k;
     for(i = 0; i < M; ++i){
         for(j = 0; j < N; ++j){
-            register float sum = 0;
+            float sum = 0;
             for(k = 0; k < K; ++k){
                 sum += ALPHA*A[i*lda+k]*B[j*ldb + k];
             }
@@ -2595,7 +2589,7 @@ void gemm_tn(int M, int N, int K, float ALPHA,
     int i,j,k;
     for(i = 0; i < M; ++i){
         for(k = 0; k < K; ++k){
-            register float A_PART = ALPHA*A[k*lda+i];
+            float A_PART = ALPHA * A[k * lda + i];
             for(j = 0; j < N; ++j){
                 C[i*ldc+j] += A_PART*B[k*ldb+j];
             }
@@ -2611,7 +2605,7 @@ void gemm_tt(int M, int N, int K, float ALPHA,
     int i,j,k;
     for(i = 0; i < M; ++i){
         for(j = 0; j < N; ++j){
-            register float sum = 0;
+            float sum = 0;
             for(k = 0; k < K; ++k){
                 sum += ALPHA*A[i+k*lda]*B[k+j*ldb];
             }
@@ -2668,9 +2662,9 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
         float *C_gpu, int ldc)
 {
     cublasHandle_t handle = blas_handle();
-    cudaError_t stream_status = cublasSetStream(handle, get_cuda_stream());
+    cudaError_t stream_status = (cudaError_t)cublasSetStream(handle, get_cuda_stream());
     CHECK_CUDA(stream_status);
-    cudaError_t status = cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N),
+    cudaError_t status = (cudaError_t)cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N),
             (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
     CHECK_CUDA(status);
 }
diff --git a/src/gemm.h b/src/gemm.h
index c34b4b35..e93ee0e4 100644
--- a/src/gemm.h
+++ b/src/gemm.h
@@ -3,6 +3,9 @@
 #include "activations.h"
 #include <stdint.h>
 #include <stddef.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
     float *weights, float *input, float *output, float *mean);
@@ -56,6 +59,7 @@ void im2col_cpu_custom_transpose(float* data_im,
 
 void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a);
 
+LIB_API void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n);
 
 void gemm_bin(int M, int N, int K, float ALPHA,
         char  *A, int lda,
@@ -109,4 +113,7 @@ void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
         float BETA,
         float *C, int ldc);
 #endif
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/getopt.c b/src/getopt.c
index cfa30233..67a764d8 100644
--- a/src/getopt.c
+++ b/src/getopt.c
@@ -1,1258 +1,498 @@
-/* Getopt for GNU.
-   NOTE: getopt is now part of the C library, so if you don't know what
-   "Keep this file name-space clean" means, talk to drepper@gnu.org
-   before changing it!
-   Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001
-        Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
+#ifdef _WIN32
+#include <getopt.h>
 
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
-   Ditto for AIX 3.2 and <stdlib.h>.  */
-#ifndef _NO_PROTO
-# define _NO_PROTO
+#ifdef __cplusplus
+extern "C" {
 #endif
 
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#if !defined __STDC__ || !__STDC__
-/* This is a separate conditional since some stdc systems
-   reject `defined (const)'.  */
-# ifndef const
-#  define const
-# endif
-#endif
-
-#include <stdio.h>
-
-/* Comment out all this code if we are using the GNU C Library, and are not
-   actually compiling the library itself.  This code is part of the GNU C
-   Library, but also included in many other GNU distributions.  Compiling
-   and linking in this code is a waste when using the GNU C library
-   (especially if it is a shared library).  Rather than having every GNU
-   program understand `configure --with-gnu-libc' and omit the object files,
-   it is simpler to just do this in the source for each such file.  */
-
-#define GETOPT_INTERFACE_VERSION 2
-#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
-# include <gnu-versions.h>
-# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
-#  define ELIDE_CODE
-# endif
-#endif
-
-#ifndef ELIDE_CODE
-
-
-/* This needs to come after some library #include
-   to get __GNU_LIBRARY__ defined.  */
-#ifdef  __GNU_LIBRARY__
-/* Don't include stdlib.h for non-GNU C libraries because some of them
-   contain conflicting prototypes for getopt.  */
-# include <stdlib.h>
-# include <unistd.h>
-#endif  /* GNU C library.  */
-
-#ifdef VMS
-# include <unixlib.h>
-# if HAVE_STRING_H - 0
-#  include <string.h>
-# endif
-#endif
-
-#ifndef _
-/* This is for other GNU distributions with internationalized messages.  */
-# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
-#  include <libintl.h>
-#  ifndef _
-#   define _(msgid)     gettext (msgid)
-#  endif
-# else
-#  define _(msgid)      (msgid)
-# endif
-# if defined _LIBC && defined USE_IN_LIBIO
-#  include <wchar.h>
-# endif
-#endif
-
-/* This version of `getopt' appears to the caller like standard Unix `getopt'
-   but it behaves differently for the user, since it allows the user
-   to intersperse the options with the other arguments.
-
-   As `getopt' works, it permutes the elements of ARGV so that,
-   when it is done, all the options precede everything else.  Thus
-   all application programs are extended to handle flexible argument order.
-
-   Setting the environment variable POSIXLY_CORRECT disables permutation.
-   Then the behavior is completely standard.
-
-   GNU application programs can use a third alternative mode in which
-   they can distinguish the relative order of options and other arguments.  */
-
-#include "getopt.h"
-
-/* For communication from `getopt' to the caller.
-   When `getopt' finds an option that takes an argument,
-   the argument value is returned here.
-   Also, when `ordering' is RETURN_IN_ORDER,
-   each non-option ARGV-element is returned here.  */
-
-char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
-   This is used for communication to and from the caller
-   and for communication between successive calls to `getopt'.
-
-   On entry to `getopt', zero means this is the first call; initialize.
-
-   When `getopt' returns -1, this is the index of the first of the
-   non-option elements that the caller should itself scan.
-
-   Otherwise, `optind' communicates from one call to the next
-   how much of ARGV has been scanned so far.  */
-
-/* 1003.2 says this must be 1 before any call.  */
-int optind = 1;
-
-/* Formerly, initialization of getopt depended on optind==0, which
-   causes problems with re-calling getopt as programs generally don't
-   know that. */
-
-int __getopt_initialized;
-
-/* The next char to be scanned in the option-element
-   in which the last option character we returned was found.
-   This allows us to pick up the scan where we left off.
-
-   If this is zero, or a null string, it means resume the scan
-   by advancing to the next ARGV-element.  */
-
-static char *nextchar;
-
-/* Callers store zero here to inhibit the error message
-   for unrecognized options.  */
-
-int opterr = 1;
-
-/* Set to an option character which was unrecognized.
-   This must be initialized on some systems to avoid linking in the
-   system's own getopt implementation.  */
-
-int optopt = '?';
-
-/* Describe how to deal with options that follow non-option ARGV-elements.
-
-   If the caller did not specify anything,
-   the default is REQUIRE_ORDER if the environment variable
-   POSIXLY_CORRECT is defined, PERMUTE otherwise.
-
-   REQUIRE_ORDER means don't recognize them as options;
-   stop option processing when the first non-option is seen.
-   This is what Unix does.
-   This mode of operation is selected by either setting the environment
-   variable POSIXLY_CORRECT, or using `+' as the first character
-   of the list of option characters.
-
-   PERMUTE is the default.  We permute the contents of ARGV as we scan,
-   so that eventually all the non-options are at the end.  This allows options
-   to be given in any order, even with programs that were not written to
-   expect this.
-
-   RETURN_IN_ORDER is an option available to programs that were written
-   to expect options and other ARGV-elements in any order and that care about
-   the ordering of the two.  We describe each non-option ARGV-element
-   as if it were the argument of an option with character code 1.
-   Using `-' as the first character of the list of option characters
-   selects this mode of operation.
-
-   The special argument `--' forces an end of option-scanning regardless
-   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
-   `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
-
-static enum
-{
-  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
-} ordering;
-
-/* Value of POSIXLY_CORRECT environment variable.  */
-static char *posixly_correct;
-
-#ifdef  __GNU_LIBRARY__
-/* We want to avoid inclusion of string.h with non-GNU libraries
-   because there are many ways it can cause trouble.
-   On some systems, it contains special magic macros that don't work
-   in GCC.  */
-# include <string.h>
-# define my_index       strchr
-#else
-
-# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */
-#  include <string.h>
-# else
-#  include <strings.h>
-# endif
-
-/* Avoid depending on library functions or files
-   whose names are inconsistent.  */
-
-#ifndef getenv
-extern char *getenv ();
-#endif
-
-static char *
-my_index (str, chr)
-     const char *str;
-     int chr;
-{
-  while (*str)
-    {
-      if (*str == chr)
-        return (char *) str;
-      str++;
-    }
-  return 0;
-}
-
-/* If using GCC, we can safely declare strlen this way.
-   If not using GCC, it is ok not to declare it.  */
-#ifdef __GNUC__
-/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
-   That was relevant to code that was here before.  */
-# if (!defined __STDC__ || !__STDC__) && !defined strlen
-/* gcc with -traditional declares the built-in strlen to return int,
-   and has done so at least since version 2.4.5. -- rms.  */
-extern int strlen (const char *);
-# endif /* not __STDC__ */
-#endif /* __GNUC__ */
-
-#endif /* not __GNU_LIBRARY__ */
-
-/* Handle permutation of arguments.  */
-
-/* Describe the part of ARGV that contains non-options that have
-   been skipped.  `first_nonopt' is the index in ARGV of the first of them;
-   `last_nonopt' is the index after the last of them.  */
-
-static int first_nonopt;
-static int last_nonopt;
-
-#ifdef _LIBC
-/* Stored original parameters.
-   XXX This is no good solution.  We should rather copy the args so
-   that we can compare them later.  But we must not use malloc(3).  */
-extern int __libc_argc;
-extern char **__libc_argv;
-
-/* Bash 2.0 gives us an environment variable containing flags
-   indicating ARGV elements that should not be considered arguments.  */
-
-# ifdef USE_NONOPTION_FLAGS
-/* Defined in getopt_init.c  */
-extern char *__getopt_nonoption_flags;
-
-static int nonoption_flags_max_len;
-static int nonoption_flags_len;
-# endif
-
-# ifdef USE_NONOPTION_FLAGS
-#  define SWAP_FLAGS(ch1, ch2) \
-  if (nonoption_flags_len > 0)                                                \
-    {                                                                         \
-      char __tmp = __getopt_nonoption_flags[ch1];                             \
-      __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2];          \
-      __getopt_nonoption_flags[ch2] = __tmp;                                  \
-    }
-# else
-#  define SWAP_FLAGS(ch1, ch2)
-# endif
-#else   /* !_LIBC */
-# define SWAP_FLAGS(ch1, ch2)
-#endif  /* _LIBC */
-
-/* Exchange two adjacent subsequences of ARGV.
-   One subsequence is elements [first_nonopt,last_nonopt)
-   which contains all the non-options that have been skipped so far.
-   The other is elements [last_nonopt,optind), which contains all
-   the options processed since those non-options were skipped.
-
-   `first_nonopt' and `last_nonopt' are relocated so that they describe
-   the new indices of the non-options in ARGV after they are moved.  */
-
-#if defined __STDC__ && __STDC__
-static void exchange (char **);
+#ifdef REPLACE_GETOPT
+int opterr = 1; /* if error message should be printed */
+int optind = 1; /* index into parent argv vector */
+int optopt = '?'; /* character checked for validity */
+#undef optreset /* see getopt.h */
+#define optreset __mingw_optreset
+int optreset; /* reset getopt */
+char* optarg; /* argument associated with option */
 #endif
 
 static void
-exchange (argv)
-     char **argv;
+_vwarnx(const char* fmt, va_list ap)
 {
-  int bottom = first_nonopt;
-  int middle = last_nonopt;
-  int top = optind;
-  char *tem;
-
-  /* Exchange the shorter segment with the far end of the longer segment.
-     That puts the shorter segment into the right place.
-     It leaves the longer segment in the right place overall,
-     but it consists of two parts that need to be swapped next.  */
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
-  /* First make sure the handling of the `__getopt_nonoption_flags'
-     string can work normally.  Our top argument must be in the range
-     of the string.  */
-  if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
-    {
-      /* We must extend the array.  The user plays games with us and
-         presents new arguments.  */
-      char *new_str = malloc (top + 1);
-      if (new_str == NULL)
-        nonoption_flags_len = nonoption_flags_max_len = 0;
-      else
-        {
-          memset (__mempcpy (new_str, __getopt_nonoption_flags,
-                             nonoption_flags_max_len),
-                  '\0', top + 1 - nonoption_flags_max_len);
-          nonoption_flags_max_len = top + 1;
-          __getopt_nonoption_flags = new_str;
-        }
-    }
-#endif
-
-  while (top > middle && middle > bottom)
-    {
-      if (top - middle > middle - bottom)
-        {
-          /* Bottom segment is the short one.  */
-          int len = middle - bottom;
-          register int i;
-
-          /* Swap it with the top part of the top segment.  */
-          for (i = 0; i < len; i++)
-            {
-              tem = argv[bottom + i];
-              argv[bottom + i] = argv[top - (middle - bottom) + i];
-              argv[top - (middle - bottom) + i] = tem;
-              SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
-            }
-          /* Exclude the moved bottom segment from further swapping.  */
-          top -= len;
-        }
-      else
-        {
-          /* Top segment is the short one.  */
-          int len = top - middle;
-          register int i;
-
-          /* Swap it with the bottom part of the bottom segment.  */
-          for (i = 0; i < len; i++)
-            {
-              tem = argv[bottom + i];
-              argv[bottom + i] = argv[middle + i];
-              argv[middle + i] = tem;
-              SWAP_FLAGS (bottom + i, middle + i);
-            }
-          /* Exclude the moved top segment from further swapping.  */
-          bottom += len;
-        }
-    }
-
-  /* Update records for the slots the non-options now occupy.  */
-
-  first_nonopt += (optind - last_nonopt);
-  last_nonopt = optind;
+  (void)fprintf(stderr, "%s: ", __progname);
+  if (fmt != NULL)
+    (void)vfprintf(stderr, fmt, ap);
+  (void)fprintf(stderr, "\n");
 }
 
-/* Initialize the internal data when the first call is made.  */
-
-#if defined __STDC__ && __STDC__
-static const char *_getopt_initialize (int, char *const *, const char *);
-#endif
-static const char *
-_getopt_initialize (argc, argv, optstring)
-     int argc;
-     char *const *argv;
-     const char *optstring;
+static void
+warnx(const char* fmt, ...)
 {
-  /* Start processing options with ARGV-element 1 (since ARGV-element 0
-     is the program name); the sequence of previously skipped
-     non-option ARGV-elements is empty.  */
-
-  first_nonopt = last_nonopt = optind;
-
-  nextchar = NULL;
-
-  posixly_correct = getenv ("POSIXLY_CORRECT");
-
-  /* Determine how to handle the ordering of options and nonoptions.  */
-
-  if (optstring[0] == '-')
-    {
-      ordering = RETURN_IN_ORDER;
-      ++optstring;
-    }
-  else if (optstring[0] == '+')
-    {
-      ordering = REQUIRE_ORDER;
-      ++optstring;
-    }
-  else if (posixly_correct != NULL)
-    ordering = REQUIRE_ORDER;
-  else
-    ordering = PERMUTE;
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
-  if (posixly_correct == NULL
-      && argc == __libc_argc && argv == __libc_argv)
-    {
-      if (nonoption_flags_max_len == 0)
-        {
-          if (__getopt_nonoption_flags == NULL
-              || __getopt_nonoption_flags[0] == '\0')
-            nonoption_flags_max_len = -1;
-          else
-            {
-              const char *orig_str = __getopt_nonoption_flags;
-              int len = nonoption_flags_max_len = strlen (orig_str);
-              if (nonoption_flags_max_len < argc)
-                nonoption_flags_max_len = argc;
-              __getopt_nonoption_flags =
-                (char *) malloc (nonoption_flags_max_len);
-              if (__getopt_nonoption_flags == NULL)
-                nonoption_flags_max_len = -1;
-              else
-                memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
-                        '\0', nonoption_flags_max_len - len);
-            }
-        }
-      nonoption_flags_len = nonoption_flags_max_len;
-    }
-  else
-    nonoption_flags_len = 0;
-#endif
-
-  return optstring;
+  va_list ap;
+  va_start(ap, fmt);
+  _vwarnx(fmt, ap);
+  va_end(ap);
 }
 
-/* Scan elements of ARGV (whose length is ARGC) for option characters
-   given in OPTSTRING.
-
-   If an element of ARGV starts with '-', and is not exactly "-" or "--",
-   then it is an option element.  The characters of this element
-   (aside from the initial '-') are option characters.  If `getopt'
-   is called repeatedly, it returns successively each of the option characters
-   from each of the option elements.
-
-   If `getopt' finds another option character, it returns that character,
-   updating `optind' and `nextchar' so that the next call to `getopt' can
-   resume the scan with the following option character or ARGV-element.
-
-   If there are no more option characters, `getopt' returns -1.
-   Then `optind' is the index in ARGV of the first ARGV-element
-   that is not an option.  (The ARGV-elements have been permuted
-   so that those that are not options now come last.)
-
-   OPTSTRING is a string containing the legitimate option characters.
-   If an option character is seen that is not listed in OPTSTRING,
-   return '?' after printing an error message.  If you set `opterr' to
-   zero, the error message is suppressed but we still return '?'.
-
-   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
-   so the following text in the same ARGV-element, or the text of the following
-   ARGV-element, is returned in `optarg'.  Two colons mean an option that
-   wants an optional arg; if there is text in the current ARGV-element,
-   it is returned in `optarg', otherwise `optarg' is set to zero.
-
-   If OPTSTRING starts with `-' or `+', it requests different methods of
-   handling the non-option ARGV-elements.
-   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
-
-   Long-named options begin with `--' instead of `-'.
-   Their names may be abbreviated as long as the abbreviation is unique
-   or is an exact match for some defined option.  If they have an
-   argument, it follows the option name in the same ARGV-element, separated
-   from the option name by a `=', or else the in next ARGV-element.
-   When `getopt' finds a long-named option, it returns 0 if that option's
-   `flag' field is nonzero, the value of the option's `val' field
-   if the `flag' field is zero.
-
-   The elements of ARGV aren't really const, because we permute them.
-   But we pretend they're const in the prototype to be compatible
-   with other systems.
-
-   LONGOPTS is a vector of `struct option' terminated by an
-   element containing a name which is zero.
-
-   LONGIND returns the index in LONGOPT of the long-named option found.
-   It is only valid when a long-named option has been found by the most
-   recent call.
-
-   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
-   long-named options.  */
-
-int
-_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
-     int argc;
-     char *const *argv;
-     const char *optstring;
-     const struct option *longopts;
-     int *longind;
-     int long_only;
+/*
+ * Compute the greatest common divisor of a and b.
+ */
+static int
+gcd(int a, int b)
 {
-  int print_errors = opterr;
-  if (optstring[0] == ':')
-    print_errors = 0;
+  int c;
 
-  if (argc < 1)
-    return -1;
+  c = a % b;
+  while (c != 0) {
+    a = b;
+    b = c;
+    c = a % b;
+  }
 
-  optarg = NULL;
+  return (b);
+}
 
-  if (optind == 0 || !__getopt_initialized)
-    {
-      if (optind == 0)
-        optind = 1;     /* Don't scan ARGV[0], the program name.  */
-      optstring = _getopt_initialize (argc, argv, optstring);
-      __getopt_initialized = 1;
+/*
+ * Exchange the block from nonopt_start to nonopt_end with the block
+ * from nonopt_end to opt_end (keeping the same order of arguments
+ * in each block).
+ */
+static void
+permute_args(int panonopt_start, int panonopt_end, int opt_end,
+    char* const* nargv)
+{
+  int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
+  char* swap;
+
+  /*
+	 * compute lengths of blocks and number and size of cycles
+	 */
+  nnonopts = panonopt_end - panonopt_start;
+  nopts = opt_end - panonopt_end;
+  ncycle = gcd(nnonopts, nopts);
+  cyclelen = (opt_end - panonopt_start) / ncycle;
+
+  for (i = 0; i < ncycle; i++) {
+    cstart = panonopt_end + i;
+    pos = cstart;
+    for (j = 0; j < cyclelen; j++) {
+      if (pos >= panonopt_end)
+        pos -= nnonopts;
+      else
+        pos += nopts;
+      swap = nargv[pos];
+      /* LINTED const cast */
+      ((char**)nargv)[pos] = nargv[cstart];
+      /* LINTED const cast */
+      ((char**)nargv)[cstart] = swap;
     }
-
-  /* Test whether ARGV[optind] points to a non-option argument.
-     Either it does not have option syntax, or there is an environment flag
-     from the shell indicating it is not an option.  The later information
-     is only used when the used in the GNU libc.  */
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
-# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0'       \
-                      || (optind < nonoption_flags_len                        \
-                          && __getopt_nonoption_flags[optind] == '1'))
-#else
-# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
-#endif
-
-  if (nextchar == NULL || *nextchar == '\0')
-    {
-      /* Advance to the next ARGV-element.  */
-
-      /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been
-         moved back by the user (who may also have changed the arguments).  */
-      if (last_nonopt > optind)
-        last_nonopt = optind;
-      if (first_nonopt > optind)
-        first_nonopt = optind;
-
-      if (ordering == PERMUTE)
-        {
-          /* If we have just processed some options following some non-options,
-             exchange them so that the options come first.  */
-
-          if (first_nonopt != last_nonopt && last_nonopt != optind)
-            exchange ((char **) argv);
-          else if (last_nonopt != optind)
-            first_nonopt = optind;
-
-          /* Skip any additional non-options
-             and extend the range of non-options previously skipped.  */
-
-          while (optind < argc && NONOPTION_P)
-            optind++;
-          last_nonopt = optind;
-        }
-
-      /* The special ARGV-element `--' means premature end of options.
-         Skip it like a null option,
-         then exchange with previous non-options as if it were an option,
-         then skip everything else like a non-option.  */
-
-      if (optind != argc && !strcmp (argv[optind], "--"))
-        {
-          optind++;
-
-          if (first_nonopt != last_nonopt && last_nonopt != optind)
-            exchange ((char **) argv);
-          else if (first_nonopt == last_nonopt)
-            first_nonopt = optind;
-          last_nonopt = argc;
-
-          optind = argc;
-        }
-
-      /* If we have done all the ARGV-elements, stop the scan
-         and back over any non-options that we skipped and permuted.  */
-
-      if (optind == argc)
-        {
-          /* Set the next-arg-index to point at the non-options
-             that we previously skipped, so the caller will digest them.  */
-          if (first_nonopt != last_nonopt)
-            optind = first_nonopt;
-          return -1;
-        }
-
-      /* If we have come to a non-option and did not permute it,
-         either stop the scan or describe it to the caller and pass it by.  */
-
-      if (NONOPTION_P)
-        {
-          if (ordering == REQUIRE_ORDER)
-            return -1;
-          optarg = argv[optind++];
-          return 1;
-        }
-
-      /* We have found another option-ARGV-element.
-         Skip the initial punctuation.  */
-
-      nextchar = (argv[optind] + 1
-                  + (longopts != NULL && argv[optind][1] == '-'));
-    }
-
-  /* Decode the current option-ARGV-element.  */
-
-  /* Check whether the ARGV-element is a long option.
-
-     If long_only and the ARGV-element has the form "-f", where f is
-     a valid short option, don't consider it an abbreviated form of
-     a long option that starts with f.  Otherwise there would be no
-     way to give the -f short option.
-
-     On the other hand, if there's a long option "fubar" and
-     the ARGV-element is "-fu", do consider that an abbreviation of
-     the long option, just like "--fu", and not "-f" with arg "u".
-
-     This distinction seems to be the most useful approach.  */
-
-  if (longopts != NULL
-      && (argv[optind][1] == '-'
-          || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
-    {
-      char *nameend;
-      const struct option *p;
-      const struct option *pfound = NULL;
-      int exact = 0;
-      int ambig = 0;
-      int indfound = -1;
-      int option_index;
-
-      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
-        /* Do nothing.  */ ;
-
-      /* Test all long options for either exact match
-         or abbreviated matches.  */
-      for (p = longopts, option_index = 0; p->name; p++, option_index++)
-        if (!strncmp (p->name, nextchar, nameend - nextchar))
-          {
-            if ((unsigned int) (nameend - nextchar)
-                == (unsigned int) strlen (p->name))
-              {
-                /* Exact match found.  */
-                pfound = p;
-                indfound = option_index;
-                exact = 1;
-                break;
-              }
-            else if (pfound == NULL)
-              {
-                /* First nonexact match found.  */
-                pfound = p;
-                indfound = option_index;
-              }
-            else if (long_only
-                     || pfound->has_arg != p->has_arg
-                     || pfound->flag != p->flag
-                     || pfound->val != p->val)
-              /* Second or later nonexact match found.  */
-              ambig = 1;
-          }
-
-      if (ambig && !exact)
-        {
-          if (print_errors)
-            {
-#if defined _LIBC && defined USE_IN_LIBIO
-              char *buf;
-
-              __asprintf (&buf, _("%s: option `%s' is ambiguous\n"),
-                          argv[0], argv[optind]);
-
-              if (_IO_fwide (stderr, 0) > 0)
-                __fwprintf (stderr, L"%s", buf);
-              else
-                fputs (buf, stderr);
-
-              free (buf);
-#else
-              fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
-                       argv[0], argv[optind]);
-#endif
-            }
-          nextchar += strlen (nextchar);
-          optind++;
-          optopt = 0;
-          return '?';
-        }
-
-      if (pfound != NULL)
-        {
-          option_index = indfound;
-          optind++;
-          if (*nameend)
-            {
-              /* Don't test has_arg with >, because some C compilers don't
-                 allow it to be used on enums.  */
-              if (pfound->has_arg)
-                optarg = nameend + 1;
-              else
-                {
-                  if (print_errors)
-                    {
-#if defined _LIBC && defined USE_IN_LIBIO
-                      char *buf;
-#endif
-
-                      if (argv[optind - 1][1] == '-')
-                        {
-                          /* --option */
-#if defined _LIBC && defined USE_IN_LIBIO
-                          __asprintf (&buf, _("\
-%s: option `--%s' doesn't allow an argument\n"),
-                                      argv[0], pfound->name);
-#else
-                          fprintf (stderr, _("\
-%s: option `--%s' doesn't allow an argument\n"),
-                                   argv[0], pfound->name);
-#endif
-                        }
-                      else
-                        {
-                          /* +option or -option */
-#if defined _LIBC && defined USE_IN_LIBIO
-                          __asprintf (&buf, _("\
-%s: option `%c%s' doesn't allow an argument\n"),
-                                      argv[0], argv[optind - 1][0],
-                                      pfound->name);
-#else
-                          fprintf (stderr, _("\
-%s: option `%c%s' doesn't allow an argument\n"),
-                                   argv[0], argv[optind - 1][0], pfound->name);
-#endif
-                        }
-
-#if defined _LIBC && defined USE_IN_LIBIO
-                      if (_IO_fwide (stderr, 0) > 0)
-                        __fwprintf (stderr, L"%s", buf);
-                      else
-                        fputs (buf, stderr);
-
-                      free (buf);
-#endif
-                    }
-
-                  nextchar += strlen (nextchar);
-
-                  optopt = pfound->val;
-                  return '?';
-                }
-            }
-          else if (pfound->has_arg == 1)
-            {
-              if (optind < argc)
-                optarg = argv[optind++];
-              else
-                {
-                  if (print_errors)
-                    {
-#if defined _LIBC && defined USE_IN_LIBIO
-                      char *buf;
-
-                      __asprintf (&buf,
-                                  _("%s: option `%s' requires an argument\n"),
-                                  argv[0], argv[optind - 1]);
-
-                      if (_IO_fwide (stderr, 0) > 0)
-                        __fwprintf (stderr, L"%s", buf);
-                      else
-                        fputs (buf, stderr);
-
-                      free (buf);
-#else
-                      fprintf (stderr,
-                               _("%s: option `%s' requires an argument\n"),
-                               argv[0], argv[optind - 1]);
-#endif
-                    }
-                  nextchar += strlen (nextchar);
-                  optopt = pfound->val;
-                  return optstring[0] == ':' ? ':' : '?';
-                }
-            }
-          nextchar += strlen (nextchar);
-          if (longind != NULL)
-            *longind = option_index;
-          if (pfound->flag)
-            {
-              *(pfound->flag) = pfound->val;
-              return 0;
-            }
-          return pfound->val;
-        }
-
-      /* Can't find it as a long option.  If this is not getopt_long_only,
-         or the option starts with '--' or is not a valid short
-         option, then it's an error.
-         Otherwise interpret it as a short option.  */
-      if (!long_only || argv[optind][1] == '-'
-          || my_index (optstring, *nextchar) == NULL)
-        {
-          if (print_errors)
-            {
-#if defined _LIBC && defined USE_IN_LIBIO
-              char *buf;
-#endif
-
-              if (argv[optind][1] == '-')
-                {
-                  /* --option */
-#if defined _LIBC && defined USE_IN_LIBIO
-                  __asprintf (&buf, _("%s: unrecognized option `--%s'\n"),
-                              argv[0], nextchar);
-#else
-                  fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
-                           argv[0], nextchar);
-#endif
-                }
-              else
-                {
-                  /* +option or -option */
-#if defined _LIBC && defined USE_IN_LIBIO
-                  __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"),
-                              argv[0], argv[optind][0], nextchar);
-#else
-                  fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
-                           argv[0], argv[optind][0], nextchar);
-#endif
-                }
-
-#if defined _LIBC && defined USE_IN_LIBIO
-              if (_IO_fwide (stderr, 0) > 0)
-                __fwprintf (stderr, L"%s", buf);
-              else
-                fputs (buf, stderr);
-
-              free (buf);
-#endif
-            }
-          nextchar = (char *) "";
-          optind++;
-          optopt = 0;
-          return '?';
-        }
-    }
-
-  /* Look at and handle the next short option-character.  */
-
-  {
-    char c = *nextchar++;
-    char *temp = my_index (optstring, c);
-
-    /* Increment `optind' when we start to process its last character.  */
-    if (*nextchar == '\0')
-      ++optind;
-
-    if (temp == NULL || c == ':')
-      {
-        if (print_errors)
-          {
-#if defined _LIBC && defined USE_IN_LIBIO
-              char *buf;
-#endif
-
-            if (posixly_correct)
-              {
-                /* 1003.2 specifies the format of this message.  */
-#if defined _LIBC && defined USE_IN_LIBIO
-                __asprintf (&buf, _("%s: illegal option -- %c\n"),
-                            argv[0], c);
-#else
-                fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c);
-#endif
-              }
-            else
-              {
-#if defined _LIBC && defined USE_IN_LIBIO
-                __asprintf (&buf, _("%s: invalid option -- %c\n"),
-                            argv[0], c);
-#else
-                fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c);
-#endif
-              }
-
-#if defined _LIBC && defined USE_IN_LIBIO
-            if (_IO_fwide (stderr, 0) > 0)
-              __fwprintf (stderr, L"%s", buf);
-            else
-              fputs (buf, stderr);
-
-            free (buf);
-#endif
-          }
-        optopt = c;
-        return '?';
-      }
-    /* Convenience. Treat POSIX -W foo same as long option --foo */
-    if (temp[0] == 'W' && temp[1] == ';')
-      {
-        char *nameend;
-        const struct option *p;
-        const struct option *pfound = NULL;
-        int exact = 0;
-        int ambig = 0;
-        int indfound = 0;
-        int option_index;
-
-        /* This is an option that requires an argument.  */
-        if (*nextchar != '\0')
-          {
-            optarg = nextchar;
-            /* If we end this ARGV-element by taking the rest as an arg,
-               we must advance to the next element now.  */
-            optind++;
-          }
-        else if (optind == argc)
-          {
-            if (print_errors)
-              {
-                /* 1003.2 specifies the format of this message.  */
-#if defined _LIBC && defined USE_IN_LIBIO
-                char *buf;
-
-                __asprintf (&buf, _("%s: option requires an argument -- %c\n"),
-                            argv[0], c);
-
-                if (_IO_fwide (stderr, 0) > 0)
-                  __fwprintf (stderr, L"%s", buf);
-                else
-                  fputs (buf, stderr);
-
-                free (buf);
-#else
-                fprintf (stderr, _("%s: option requires an argument -- %c\n"),
-                         argv[0], c);
-#endif
-              }
-            optopt = c;
-            if (optstring[0] == ':')
-              c = ':';
-            else
-              c = '?';
-            return c;
-          }
-        else
-          /* We already incremented `optind' once;
-             increment it again when taking next ARGV-elt as argument.  */
-          optarg = argv[optind++];
-
-        /* optarg is now the argument, see if it's in the
-           table of longopts.  */
-
-        for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
-          /* Do nothing.  */ ;
-
-        /* Test all long options for either exact match
-           or abbreviated matches.  */
-        for (p = longopts, option_index = 0; p->name; p++, option_index++)
-          if (!strncmp (p->name, nextchar, nameend - nextchar))
-            {
-              if ((unsigned int) (nameend - nextchar) == strlen (p->name))
-                {
-                  /* Exact match found.  */
-                  pfound = p;
-                  indfound = option_index;
-                  exact = 1;
-                  break;
-                }
-              else if (pfound == NULL)
-                {
-                  /* First nonexact match found.  */
-                  pfound = p;
-                  indfound = option_index;
-                }
-              else
-                /* Second or later nonexact match found.  */
-                ambig = 1;
-            }
-        if (ambig && !exact)
-          {
-            if (print_errors)
-              {
-#if defined _LIBC && defined USE_IN_LIBIO
-                char *buf;
-
-                __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"),
-                            argv[0], argv[optind]);
-
-                if (_IO_fwide (stderr, 0) > 0)
-                  __fwprintf (stderr, L"%s", buf);
-                else
-                  fputs (buf, stderr);
-
-                free (buf);
-#else
-                fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
-                         argv[0], argv[optind]);
-#endif
-              }
-            nextchar += strlen (nextchar);
-            optind++;
-            return '?';
-          }
-        if (pfound != NULL)
-          {
-            option_index = indfound;
-            if (*nameend)
-              {
-                /* Don't test has_arg with >, because some C compilers don't
-                   allow it to be used on enums.  */
-                if (pfound->has_arg)
-                  optarg = nameend + 1;
-                else
-                  {
-                    if (print_errors)
-                      {
-#if defined _LIBC && defined USE_IN_LIBIO
-                        char *buf;
-
-                        __asprintf (&buf, _("\
-%s: option `-W %s' doesn't allow an argument\n"),
-                                    argv[0], pfound->name);
-
-                        if (_IO_fwide (stderr, 0) > 0)
-                          __fwprintf (stderr, L"%s", buf);
-                        else
-                          fputs (buf, stderr);
-
-                        free (buf);
-#else
-                        fprintf (stderr, _("\
-%s: option `-W %s' doesn't allow an argument\n"),
-                                 argv[0], pfound->name);
-#endif
-                      }
-
-                    nextchar += strlen (nextchar);
-                    return '?';
-                  }
-              }
-            else if (pfound->has_arg == 1)
-              {
-                if (optind < argc)
-                  optarg = argv[optind++];
-                else
-                  {
-                    if (print_errors)
-                      {
-#if defined _LIBC && defined USE_IN_LIBIO
-                        char *buf;
-
-                        __asprintf (&buf, _("\
-%s: option `%s' requires an argument\n"),
-                                    argv[0], argv[optind - 1]);
-
-                        if (_IO_fwide (stderr, 0) > 0)
-                          __fwprintf (stderr, L"%s", buf);
-                        else
-                          fputs (buf, stderr);
-
-                        free (buf);
-#else
-                        fprintf (stderr,
-                                 _("%s: option `%s' requires an argument\n"),
-                                 argv[0], argv[optind - 1]);
-#endif
-                      }
-                    nextchar += strlen (nextchar);
-                    return optstring[0] == ':' ? ':' : '?';
-                  }
-              }
-            nextchar += strlen (nextchar);
-            if (longind != NULL)
-              *longind = option_index;
-            if (pfound->flag)
-              {
-                *(pfound->flag) = pfound->val;
-                return 0;
-              }
-            return pfound->val;
-          }
-          nextchar = NULL;
-          return 'W';   /* Let the application handle it.   */
-      }
-    if (temp[1] == ':')
-      {
-        if (temp[2] == ':')
-          {
-            /* This is an option that accepts an argument optionally.  */
-            if (*nextchar != '\0')
-              {
-                optarg = nextchar;
-                optind++;
-              }
-            else
-              optarg = NULL;
-            nextchar = NULL;
-          }
-        else
-          {
-            /* This is an option that requires an argument.  */
-            if (*nextchar != '\0')
-              {
-                optarg = nextchar;
-                /* If we end this ARGV-element by taking the rest as an arg,
-                   we must advance to the next element now.  */
-                optind++;
-              }
-            else if (optind == argc)
-              {
-                if (print_errors)
-                  {
-                    /* 1003.2 specifies the format of this message.  */
-#if defined _LIBC && defined USE_IN_LIBIO
-                    char *buf;
-
-                    __asprintf (&buf,
-                                _("%s: option requires an argument -- %c\n"),
-                                argv[0], c);
-
-                    if (_IO_fwide (stderr, 0) > 0)
-                      __fwprintf (stderr, L"%s", buf);
-                    else
-                      fputs (buf, stderr);
-
-                    free (buf);
-#else
-                    fprintf (stderr,
-                             _("%s: option requires an argument -- %c\n"),
-                             argv[0], c);
-#endif
-                  }
-                optopt = c;
-                if (optstring[0] == ':')
-                  c = ':';
-                else
-                  c = '?';
-              }
-            else
-              /* We already incremented `optind' once;
-                 increment it again when taking next ARGV-elt as argument.  */
-              optarg = argv[optind++];
-            nextchar = NULL;
-          }
-      }
-    return c;
   }
 }
 
-int
-getopt (argc, argv, optstring)
-     int argc;
-     char *const *argv;
-     const char *optstring;
+#ifdef REPLACE_GETOPT
+/*
+ * getopt --
+ *	Parse argc/argv argument vector.
+ *
+ * [eventually this will replace the BSD getopt]
+ */
+int getopt(int nargc, char* const* nargv, const char* options)
 {
-  return _getopt_internal (argc, argv, optstring,
-                           (const struct option *) 0,
-                           (int *) 0,
-                           0);
+
+  /*
+	 * We don't pass FLAG_PERMUTE to getopt_internal() since
+	 * the BSD getopt(3) (unlike GNU) has never done this.
+	 *
+	 * Furthermore, since many privileged programs call getopt()
+	 * before dropping privileges it makes sense to keep things
+	 * as simple (and bug-free) as possible.
+	 */
+  return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
+}
+#endif /* REPLACE_GETOPT */
+
+//extern int getopt(int nargc, char * const *nargv, const char *options);
+
+#ifdef __cplusplus
+}
+#endif
+/*
+ * POSIX requires the `getopt' API to be specified in `unistd.h';
+ * thus, `unistd.h' includes this header.  However, we do not want
+ * to expose the `getopt_long' or `getopt_long_only' APIs, when
+ * included in this manner.  Thus, close the standard __GETOPT_H__
+ * declarations block, and open an additional __GETOPT_LONG_H__
+ * specific block, only when *not* __UNISTD_H_SOURCED__, in which
+ * to declare the extended API.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct option /* specification for a long form option...	*/
+{
+  const char* name; /* option name, without leading hyphens */
+  int has_arg; /* does it take an argument?		*/
+  int* flag; /* where to save its status, or NULL	*/
+  int val; /* its associated status value		*/
+};
+
+enum /* permitted values for its `has_arg' field...	*/
+{
+  no_argument = 0, /* option never takes an argument	*/
+  required_argument, /* option always requires an argument	*/
+  optional_argument /* option may take an argument		*/
+};
+
+/*
+ * parse_long_options --
+ *	Parse long options in argc/argv argument vector.
+ * Returns -1 if short_too is set and the option does not match long_options.
+ */
+static int
+parse_long_options(char* const* nargv, const char* options,
+    const struct option* long_options, int* idx, int short_too)
+{
+  char *current_argv, *has_equal;
+  size_t current_argv_len;
+  int i, ambiguous, match;
+
+#define IDENTICAL_INTERPRETATION(_x, _y) \
+  (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag && long_options[(_x)].val == long_options[(_y)].val)
+
+  current_argv = place;
+  match = -1;
+  ambiguous = 0;
+
+  optind++;
+
+  if ((has_equal = strchr(current_argv, '=')) != NULL) {
+    /* argument found (--option=arg) */
+    current_argv_len = has_equal - current_argv;
+    has_equal++;
+  } else
+    current_argv_len = strlen(current_argv);
+
+  for (i = 0; long_options[i].name; i++) {
+    /* find matching long option */
+    if (strncmp(current_argv, long_options[i].name,
+            current_argv_len))
+      continue;
+
+    if (strlen(long_options[i].name) == current_argv_len) {
+      /* exact match */
+      match = i;
+      ambiguous = 0;
+      break;
+    }
+    /*
+		 * If this is a known short option, don't allow
+		 * a partial match of a single character.
+		 */
+    if (short_too && current_argv_len == 1)
+      continue;
+
+    if (match == -1) /* partial match */
+      match = i;
+    else if (!IDENTICAL_INTERPRETATION(i, match))
+      ambiguous = 1;
+  }
+  if (ambiguous) {
+    /* ambiguous abbreviation */
+    if (PRINT_ERROR)
+      warnx(ambig, (int)current_argv_len,
+          current_argv);
+    optopt = 0;
+    return (BADCH);
+  }
+  if (match != -1) { /* option found */
+    if (long_options[match].has_arg == no_argument
+        && has_equal) {
+      if (PRINT_ERROR)
+        warnx(noarg, (int)current_argv_len,
+            current_argv);
+      /*
+			 * XXX: GNU sets optopt to val regardless of flag
+			 */
+      if (long_options[match].flag == NULL)
+        optopt = long_options[match].val;
+      else
+        optopt = 0;
+      return (BADARG);
+    }
+    if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument) {
+      if (has_equal)
+        optarg = has_equal;
+      else if (long_options[match].has_arg == required_argument) {
+        /*
+				 * optional argument doesn't use next nargv
+				 */
+        optarg = nargv[optind++];
+      }
+    }
+    if ((long_options[match].has_arg == required_argument)
+        && (optarg == NULL)) {
+      /*
+			 * Missing argument; leading ':' indicates no error
+			 * should be generated.
+			 */
+      if (PRINT_ERROR)
+        warnx(recargstring,
+            current_argv);
+      /*
+			 * XXX: GNU sets optopt to val regardless of flag
+			 */
+      if (long_options[match].flag == NULL)
+        optopt = long_options[match].val;
+      else
+        optopt = 0;
+      --optind;
+      return (BADARG);
+    }
+  } else { /* unknown option */
+    if (short_too) {
+      --optind;
+      return (-1);
+    }
+    if (PRINT_ERROR)
+      warnx(illoptstring, current_argv);
+    optopt = 0;
+    return (BADCH);
+  }
+  if (idx)
+    *idx = match;
+  if (long_options[match].flag) {
+    *long_options[match].flag = long_options[match].val;
+    return (0);
+  } else
+    return (long_options[match].val);
+#undef IDENTICAL_INTERPRETATION
 }
 
-#endif  /* Not ELIDE_CODE.  */
-
-
-/* Compile with -DTEST to make an executable for use in testing
-   the above definition of `getopt'.  */
-
-/* #define TEST */        /* Pete Wilson mod 7/28/02 */
-#ifdef TEST
-
-#ifndef exit         /* Pete Wilson mod 7/28/02 */
-  int exit(int);     /* Pete Wilson mod 7/28/02 */
-#endif               /* Pete Wilson mod 7/28/02 */
-
-int
-main (argc, argv)
-     int argc;
-     char **argv;
+/*
+ * getopt_internal --
+ *	Parse argc/argv argument vector.  Called by user level routines.
+ */
+static int
+getopt_internal(int nargc, char* const* nargv, const char* options,
+    const struct option* long_options, int* idx, int flags)
 {
-  int c;
-  int digit_optind = 0;
+  char* oli; /* option letter list index */
+  int optchar, short_too;
+  static int posixly_correct = -1;
 
-  while (1)
-    {
-      int this_option_optind = optind ? optind : 1;
+  if (options == NULL)
+    return (-1);
 
-      c = getopt (argc, argv, "abc:d:0123456789");
-      if (c == -1)
-        break;
+  /*
+	 * XXX Some GNU programs (like cvs) set optind to 0 instead of
+	 * XXX using optreset.  Work around this braindamage.
+	 */
+  if (optind == 0)
+    optind = optreset = 1;
 
-      switch (c)
-        {
-        case '0':
-        case '1':
-        case '2':
-        case '3':
-        case '4':
-        case '5':
-        case '6':
-        case '7':
-        case '8':
-        case '9':
-          if (digit_optind != 0 && digit_optind != this_option_optind)
-            printf ("digits occur in two different argv-elements.\n");
-          digit_optind = this_option_optind;
-          printf ("option %c\n", c);
-          break;
+  /*
+	 * Disable GNU extensions if POSIXLY_CORRECT is set or options
+	 * string begins with a '+'.
+	 *
+	 * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
+	 *                 optreset != 0 for GNU compatibility.
+	 */
+  if (posixly_correct == -1 || optreset != 0)
+    posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
+  if (*options == '-')
+    flags |= FLAG_ALLARGS;
+  else if (posixly_correct || *options == '+')
+    flags &= ~FLAG_PERMUTE;
+  if (*options == '+' || *options == '-')
+    options++;
 
-        case 'a':
-          printf ("option a\n");
-          break;
-
-        case 'b':
-          printf ("option b\n");
-          break;
-
-        case 'c':
-          printf ("option c with value `%s'\n", optarg);
-          break;
-
-        case '?':
-          break;
-
-        default:
-          printf ("?? getopt returned character code 0%o ??\n", c);
-        }
+  optarg = NULL;
+  if (optreset)
+    nonopt_start = nonopt_end = -1;
+start:
+  if (optreset || !*place) { /* update scanning pointer */
+    optreset = 0;
+    if (optind >= nargc) { /* end of argument vector */
+      place = EMSG;
+      if (nonopt_end != -1) {
+        /* do permutation, if we have to */
+        permute_args(nonopt_start, nonopt_end,
+            optind, nargv);
+        optind -= nonopt_end - nonopt_start;
+      } else if (nonopt_start != -1) {
+        /*
+				 * If we skipped non-options, set optind
+				 * to the first of them.
+				 */
+        optind = nonopt_start;
+      }
+      nonopt_start = nonopt_end = -1;
+      return (-1);
     }
-
-  if (optind < argc)
-    {
-      printf ("non-option ARGV-elements: ");
-      while (optind < argc)
-        printf ("%s ", argv[optind++]);
-      printf ("\n");
+    if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL)) {
+      place = EMSG; /* found non-option */
+      if (flags & FLAG_ALLARGS) {
+        /*
+				 * GNU extension:
+				 * return non-option as argument to option 1
+				 */
+        optarg = nargv[optind++];
+        return (INORDER);
+      }
+      if (!(flags & FLAG_PERMUTE)) {
+        /*
+				 * If no permutation wanted, stop parsing
+				 * at first non-option.
+				 */
+        return (-1);
+      }
+      /* do permutation */
+      if (nonopt_start == -1)
+        nonopt_start = optind;
+      else if (nonopt_end != -1) {
+        permute_args(nonopt_start, nonopt_end,
+            optind, nargv);
+        nonopt_start = optind - (nonopt_end - nonopt_start);
+        nonopt_end = -1;
+      }
+      optind++;
+      /* process next argument */
+      goto start;
     }
+    if (nonopt_start != -1 && nonopt_end == -1)
+      nonopt_end = optind;
 
-  exit (0);
+    /*
+		 * If we have "-" do nothing, if "--" we are done.
+		 */
+    if (place[1] != '\0' && *++place == '-' && place[1] == '\0') {
+      optind++;
+      place = EMSG;
+      /*
+			 * We found an option (--), so if we skipped
+			 * non-options, we have to permute.
+			 */
+      if (nonopt_end != -1) {
+        permute_args(nonopt_start, nonopt_end,
+            optind, nargv);
+        optind -= nonopt_end - nonopt_start;
+      }
+      nonopt_start = nonopt_end = -1;
+      return (-1);
+    }
+  }
+
+  /*
+	 * Check long options if:
+	 *  1) we were passed some
+	 *  2) the arg is not just "-"
+	 *  3) either the arg starts with -- we are getopt_long_only()
+	 */
+  if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY))) {
+    short_too = 0;
+    if (*place == '-')
+      place++; /* --foo long option */
+    else if (*place != ':' && strchr(options, *place) != NULL)
+      short_too = 1; /* could be short option too */
+
+    optchar = parse_long_options(nargv, options, long_options,
+        idx, short_too);
+    if (optchar != -1) {
+      place = EMSG;
+      return (optchar);
+    }
+  }
+
+  if ((optchar = (int)*place++) == (int)':' || (optchar == (int)'-' && *place != '\0') || (oli = (char*)strchr(options, optchar)) == NULL) {
+    /*
+		 * If the user specified "-" and  '-' isn't listed in
+		 * options, return -1 (non-option) as per POSIX.
+		 * Otherwise, it is an unknown option character (or ':').
+		 */
+    if (optchar == (int)'-' && *place == '\0')
+      return (-1);
+    if (!*place)
+      ++optind;
+    if (PRINT_ERROR)
+      warnx(illoptchar, optchar);
+    optopt = optchar;
+    return (BADCH);
+  }
+  if (long_options != NULL && optchar == 'W' && oli[1] == ';') {
+    /* -W long-option */
+    if (*place) /* no space */
+      /* NOTHING */;
+    else if (++optind >= nargc) { /* no arg */
+      place = EMSG;
+      if (PRINT_ERROR)
+        warnx(recargchar, optchar);
+      optopt = optchar;
+      return (BADARG);
+    } else /* white space */
+      place = nargv[optind];
+    optchar = parse_long_options(nargv, options, long_options,
+        idx, 0);
+    place = EMSG;
+    return (optchar);
+  }
+  if (*++oli != ':') { /* doesn't take argument */
+    if (!*place)
+      ++optind;
+  } else { /* takes (optional) argument */
+    optarg = NULL;
+    if (*place) /* no white space */
+      optarg = place;
+    else if (oli[1] != ':') { /* arg not optional */
+      if (++optind >= nargc) { /* no arg */
+        place = EMSG;
+        if (PRINT_ERROR)
+          warnx(recargchar, optchar);
+        optopt = optchar;
+        return (BADARG);
+      } else
+        optarg = nargv[optind];
+    }
+    place = EMSG;
+    ++optind;
+  }
+  /* dump back option letter */
+  return (optchar);
 }
 
-#endif /* TEST */
+/*
+ * getopt_long --
+ *	Parse argc/argv argument vector.
+ */
+int getopt_long(int nargc, char* const* nargv, const char* options,
+    const struct option* long_options, int* idx)
+{
+
+  return (getopt_internal(nargc, nargv, options, long_options, idx,
+      FLAG_PERMUTE));
+}
+
+/*
+ * getopt_long_only --
+ *	Parse argc/argv argument vector.
+ */
+int getopt_long_only(int nargc, char* const* nargv, const char* options,
+    const struct option* long_options, int* idx)
+{
+
+  return (getopt_internal(nargc, nargv, options, long_options, idx,
+      FLAG_PERMUTE | FLAG_LONGONLY));
+}
+
+//extern int getopt_long(int nargc, char * const *nargv, const char *options,
+//    const struct option *long_options, int *idx);
+//extern int getopt_long_only(int nargc, char * const *nargv, const char *options,
+//    const struct option *long_options, int *idx);
+/*
+ * Previous MinGW implementation had...
+ */
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/getopt.h b/src/getopt.h
index f3696d95..431ba864 100644
--- a/src/getopt.h
+++ b/src/getopt.h
@@ -1,133 +1,228 @@
-/* Declarations for getopt.
-   Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
+#ifdef _WIN32
+#ifndef __GETOPT_H__
+/**
+ * DISCLAIMER
+ * This file is part of the mingw-w64 runtime package.
+ *
+ * The mingw-w64 runtime package and its code is distributed in the hope that it
+ * will be useful but WITHOUT ANY WARRANTY.  ALL WARRANTIES, EXPRESSED OR
+ * IMPLIED ARE HEREBY DISCLAIMED.  This includes but is not limited to
+ * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+/*
+ * Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ */
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Dieter Baron and Thomas Klausner.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
-This file is part of the GNU C Library.  Its master source is NOT part of
-the C library, however.  The master source lives in /gd/gnu/lib.
+#define __GETOPT_H__
 
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+/* All the headers include this file. */
+#include <crtdefs.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
 
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.  */
-
-#ifndef _GETOPT_H
-#define _GETOPT_H 1
-
-#ifdef	__cplusplus
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-/* For communication from `getopt' to the caller.
-   When `getopt' finds an option that takes an argument,
-   the argument value is returned here.
-   Also, when `ordering' is RETURN_IN_ORDER,
-   each non-option ARGV-element is returned here.  */
+#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
 
-extern char *optarg;
+//extern int optind;		/* index of first non-option in argv      */
+//extern int optopt;		/* single option character, as parsed     */
+//extern int opterr;		/* flag to enable built-in diagnostics... */
+//				/* (user may set to zero, to suppress)    */
+//
+//extern char *optarg;		/* pointer to argument of current option  */
 
-/* Index in ARGV of the next element to be scanned.
-   This is used for communication to and from the caller
-   and for communication between successive calls to `getopt'.
+#define PRINT_ERROR ((opterr) && (*options != ':'))
 
-   On entry to `getopt', zero means this is the first call; initialize.
+#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
+#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
+#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
 
-   When `getopt' returns EOF, this is the index of the first of the
-   non-option elements that the caller should itself scan.
+/* return values */
+#define BADCH (int)'?'
+#define BADARG ((*options == ':') ? (int)':' : (int)'?')
+#define INORDER (int)1
 
-   Otherwise, `optind' communicates from one call to the next
-   how much of ARGV has been scanned so far.  */
-
-extern int optind;
-
-/* Callers store zero here to inhibit the error message `getopt' prints
-   for unrecognized options.  */
-
-extern int opterr;
-
-/* Set to an option character which was unrecognized.  */
-
-extern int optopt;
-
-/* Describe the long-named options requested by the application.
-   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
-   of `struct option' terminated by an element containing a name which is
-   zero.
-
-   The field `has_arg' is:
-   no_argument		(or 0) if the option does not take an argument,
-   required_argument	(or 1) if the option requires an argument,
-   optional_argument 	(or 2) if the option takes an optional argument.
-
-   If the field `flag' is not NULL, it points to a variable that is set
-   to the value given in the field `val' when the option is found, but
-   left unchanged if the option is not found.
-
-   To have a long-named option do something other than set an `int' to
-   a compiled-in constant, such as set a value from `optarg', set the
-   option's `flag' field to zero and its `val' field to a nonzero
-   value (the equivalent single-letter option character, if there is
-   one).  For long options that have a zero `flag' field, `getopt'
-   returns the contents of the `val' field.  */
-
-struct option
-{
-#if defined (__STDC__) && __STDC__
-  const char *name;
+#ifndef __CYGWIN__
+#define __progname __argv[0]
 #else
-  char *name;
+extern char __declspec(dllimport) * __progname;
 #endif
-  /* has_arg can't be an enum because some compilers complain about
-     type mismatches in all the code that assumes it is an int.  */
-  int has_arg;
-  int *flag;
-  int val;
-};
 
-/* Names for the values of the `has_arg' field of `struct option'.  */
+#ifdef __CYGWIN__
+static char EMSG[] = "";
+#else
+#define EMSG ""
+#endif
 
-#define	no_argument		0
-#define required_argument	1
-#define optional_argument	2
+static int getopt_internal(int, char* const*, const char*,
+    const struct option*, int*, int);
+static int parse_long_options(char* const*, const char*,
+    const struct option*, int*, int);
+static int gcd(int, int);
+static void permute_args(int, int, int, char* const*);
 
-#if defined (__STDC__) && __STDC__
-#ifdef __GNU_LIBRARY__
-/* Many other libraries have conflicting prototypes for getopt, with
-   differences in the consts, in stdlib.h.  To avoid compilation
-   errors, only prototype getopt for the GNU C library.  */
-extern int getopt (int argc, char *const *argv, const char *shortopts);
-#else /* not __GNU_LIBRARY__ */
-extern int getopt ();
-#endif /* __GNU_LIBRARY__ */
-extern int getopt_long (int argc, char *const *argv, const char *shortopts,
-		        const struct option *longopts, int *longind);
-extern int getopt_long_only (int argc, char *const *argv,
-			     const char *shortopts,
-		             const struct option *longopts, int *longind);
+static char* place = EMSG; /* option letter processing */
 
-/* Internal only.  Users should not call this directly.  */
-extern int _getopt_internal (int argc, char *const *argv,
-			     const char *shortopts,
-		             const struct option *longopts, int *longind,
-			     int long_only);
-#else /* not __STDC__ */
-extern int getopt ();
-extern int getopt_long ();
-extern int getopt_long_only ();
+/* XXX: set optreset to 1 rather than these two */
+static int nonopt_start = -1; /* first non option argument (for permute) */
+static int nonopt_end = -1; /* first option after non options (for permute) */
 
-extern int _getopt_internal ();
-#endif /* __STDC__ */
+/* Error messages */
+static const char recargchar[] = "option requires an argument -- %c";
+static const char recargstring[] = "option requires an argument -- %s";
+static const char ambig[] = "ambiguous option -- %.*s";
+static const char noarg[] = "option doesn't take an argument -- %.*s";
+static const char illoptchar[] = "unknown option -- %c";
+static const char illoptstring[] = "unknown option -- %s";
 
-#ifdef	__cplusplus
+static void _vwarnx(const char* fmt, va_list ap);
+
+static void warnx(const char* fmt, ...);
+
+/*
+ * Compute the greatest common divisor of a and b.
+ */
+static int gcd(int a, int b);
+
+/*
+ * Exchange the block from nonopt_start to nonopt_end with the block
+ * from nonopt_end to opt_end (keeping the same order of arguments
+ * in each block).
+ */
+static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv);
+
+#ifdef REPLACE_GETOPT
+/*
+ * getopt --
+ *	Parse argc/argv argument vector.
+ *
+ * [eventually this will replace the BSD getopt]
+ */
+int getopt(int nargc, char* const* nargv, const char* options);
+#endif /* REPLACE_GETOPT */
+
+//extern int getopt(int nargc, char * const *nargv, const char *options);
+
+#ifdef _BSD_SOURCE
+/*
+ * BSD adds the non-standard `optreset' feature, for reinitialisation
+ * of `getopt' parsing.  We support this feature, for applications which
+ * proclaim their BSD heritage, before including this header; however,
+ * to maintain portability, developers are advised to avoid it.
+ */
+#define optreset __mingw_optreset
+extern int optreset;
+#endif
+#ifdef __cplusplus
+}
+#endif
+/*
+ * POSIX requires the `getopt' API to be specified in `unistd.h';
+ * thus, `unistd.h' includes this header.  However, we do not want
+ * to expose the `getopt_long' or `getopt_long_only' APIs, when
+ * included in this manner.  Thus, close the standard __GETOPT_H__
+ * declarations block, and open an additional __GETOPT_LONG_H__
+ * specific block, only when *not* __UNISTD_H_SOURCED__, in which
+ * to declare the extended API.
+ */
+#endif /* !defined(__GETOPT_H__) */
+
+#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
+#define __GETOPT_LONG_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * parse_long_options --
+ *	Parse long options in argc/argv argument vector.
+ * Returns -1 if short_too is set and the option does not match long_options.
+ */
+static int parse_long_options(char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too);
+
+/*
+ * getopt_internal --
+ *	Parse argc/argv argument vector.  Called by user level routines.
+ */
+static int getopt_internal(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags);
+
+/*
+ * getopt_long --
+ *	Parse argc/argv argument vector.
+ */
+int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
+
+/*
+ * getopt_long_only --
+ *	Parse argc/argv argument vector.
+ */
+int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
+
+/*
+ * Previous MinGW implementation had...
+ */
+#ifndef HAVE_DECL_GETOPT
+/*
+ * ...for the long form API only; keep this for compatibility.
+ */
+#define HAVE_DECL_GETOPT 1
+#endif
+
+#ifdef __cplusplus
 }
 #endif
 
-#endif /* _GETOPT_H */
+#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
+#endif
diff --git a/src/gettimeofday.c b/src/gettimeofday.c
index 7df6480d..13fc9f74 100644
--- a/src/gettimeofday.c
+++ b/src/gettimeofday.c
@@ -1,49 +1,43 @@
+#ifdef _WIN32
 #include "gettimeofday.h"
 
-int gettimeofday(struct timeval *tv, struct timezone *tz)
+LIB_API int gettimeofday(struct timeval* tp, struct timezone* tzp)
 {
-  FILETIME ft;
-  unsigned __int64 tmpres = 0;
-  static int tzflag;
- 
-  if (NULL != tv)
-  {
-    GetSystemTimeAsFileTime(&ft);
- 
-    tmpres |= ft.dwHighDateTime;
-    tmpres <<= 32;
-    tmpres |= ft.dwLowDateTime;
+  static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL);
+  SYSTEMTIME system_time;
+  FILETIME file_time;
+  uint64_t time;
+
  
+  GetSystemTime(&system_time);
+  SystemTimeToFileTime(&system_time, &file_time);
+  time = ((uint64_t)file_time.dwLowDateTime);
+  time += ((uint64_t)file_time.dwHighDateTime) << 32;
     /*converting file time to unix epoch*/
-    tmpres -= DELTA_EPOCH_IN_MICROSECS; 
-    tmpres /= 10;  /*convert into microseconds*/
-    tv->tv_sec = (long)(tmpres / 1000000UL);
-    tv->tv_usec = (long)(tmpres % 1000000UL);
-  }
- 
-  if (NULL != tz)
-  {
-    if (!tzflag)
-    {
-      _tzset();
-      tzflag++;
-    }
-    tz->tz_minuteswest = _timezone / 60;
-    tz->tz_dsttime = _daylight;
-  }
- 
+  tp->tv_sec = (long)((time - EPOCH) / 10000000L);
+  tp->tv_usec = (long)(system_time.wMilliseconds * 1000);
   return 0;
+  }
+ 
+LIB_API int clock_gettime(int dummy, struct timespec* ct)
+  {
+  LARGE_INTEGER count;
+
+  if (g_first_time) {
+    g_first_time = 0;
+
+    if (0 == QueryPerformanceFrequency(&g_counts_per_sec)) {
+      g_counts_per_sec.QuadPart = 0;
+    }
+  }
+ 
+  if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) || (0 == QueryPerformanceCounter(&count))) {
+    return -1;
 }
 
-/* never worry about timersub type activies again -- from GLIBC and upcased. */
-int timersub(struct timeval *a, struct timeval *b, struct timeval *result)
-{                                                                
-         (result)->tv_sec = (a)->tv_sec - (b)->tv_sec;                        
-         (result)->tv_usec = (a)->tv_usec - (b)->tv_usec;                     
-         if ((result)->tv_usec < 0) {                                         
-           --(result)->tv_sec;                                                
-           (result)->tv_usec += 1000000;                                      
-         }                                                                         
+  ct->tv_sec = count.QuadPart / g_counts_per_sec.QuadPart;
+  ct->tv_nsec = ((count.QuadPart % g_counts_per_sec.QuadPart) * BILLION) / g_counts_per_sec.QuadPart;
 
     return 0;
-}
\ No newline at end of file
+}
+#endif
diff --git a/src/gettimeofday.h b/src/gettimeofday.h
index d38dfb5a..17d7a0f6 100644
--- a/src/gettimeofday.h
+++ b/src/gettimeofday.h
@@ -1,20 +1,39 @@
-#pragma once
-
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+#include <Winsock2.h>
+#include <stdint.h>
 #include < time.h >
-#include <windows.h> //I've ommited this line.
-#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
-  #define DELTA_EPOCH_IN_MICROSECS  11644473600000000Ui64
-#else
-  #define DELTA_EPOCH_IN_MICROSECS  11644473600000000ULL
+#include "darknet.h"
+
+#define CLOCK_REALTIME (1)
+#define BILLION (1E9)
+
+#ifndef timersub
+#define timersub(a, b, result)                       \
+  do {                                               \
+    (result)->tv_sec = (a)->tv_sec - (b)->tv_sec;    \
+    (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+    if ((result)->tv_usec < 0) {                     \
+      --(result)->tv_sec;                            \
+      (result)->tv_usec += 1000000;                  \
+    }                                                \
+  } while (0)
+#endif // timersub
+
+#ifdef __cplusplus
+extern "C" {
 #endif
  
-struct timezone 
-{
-  int  tz_minuteswest; /* minutes W of Greenwich */
-  int  tz_dsttime;     /* type of dst correction */
-};
- 
-int gettimeofday(struct timeval *tv, struct timezone *tz);
+static unsigned char g_first_time = 1;
+static LARGE_INTEGER g_counts_per_sec;
+
+LIB_API int gettimeofday(struct timeval*, struct timezone*);
+LIB_API int clock_gettime(int, struct timespec*);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
 
-/* never worry about timersub type activies again -- from GLIBC and upcased. */
-int timersub(struct timeval *a, struct timeval *b, struct timeval *result);
\ No newline at end of file
diff --git a/src/go.c b/src/go.c
index 223d136d..2576ecef 100644
--- a/src/go.c
+++ b/src/go.c
@@ -5,13 +5,12 @@
 #include "blas.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 int inverted = 1;
 int noi = 1;
-//static const int nind = 5;
-#define nind 5
+static const unsigned int n_ind = 5;
 
 typedef struct {
     char **data;
@@ -22,7 +21,7 @@ char *fgetgo(FILE *fp)
 {
     if(feof(fp)) return 0;
     size_t size = 94;
-    char *line = malloc(size*sizeof(char));
+    char* line = (char*)malloc(size * sizeof(char));
     if(size != fread(line, sizeof(char), size, fp)){
         free(line);
         return 0;
@@ -35,21 +34,21 @@ moves load_go_moves(char *filename)
 {
     moves m;
     m.n = 128;
-    m.data = calloc(128, sizeof(char*));
+    m.data = (char**)calloc(128, sizeof(char*));
     FILE *fp = fopen(filename, "rb");
     int count = 0;
     char *line = 0;
     while((line = fgetgo(fp))){
         if(count >= m.n){
             m.n *= 2;
-            m.data = realloc(m.data, m.n*sizeof(char*));
+            m.data = (char**)realloc(m.data, m.n * sizeof(char*));
         }
         m.data[count] = line;
         ++count;
     }
     printf("%d\n", count);
     m.n = count;
-    m.data = realloc(m.data, count*sizeof(char*));
+    m.data = (char**)realloc(m.data, count * sizeof(char*));
     return m;
 }
 
@@ -127,12 +126,12 @@ void train_go(char *cfgfile, char *weightfile)
     }
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
 
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
 
     char buff[256];
-    float *board = calloc(19*19*net.batch, sizeof(float));
-    float *move = calloc(19*19*net.batch, sizeof(float));
-    moves m = load_go_moves("/home/pjreddie/backup/go.train");
+    float* board = (float*)calloc(19 * 19 * net.batch, sizeof(float));
+    float* move = (float*)calloc(19 * 19 * net.batch, sizeof(float));
+    moves m = load_go_moves("backup/go.train");
     //moves m = load_go_moves("games.txt");
 
     int N = m.n;
@@ -187,7 +186,7 @@ void propagate_liberty(float *board, int *lib, int *visited, int row, int col, i
 
 int *calculate_liberties(float *board)
 {
-    int *lib = calloc(19*19, sizeof(int));
+    int* lib = (int*)calloc(19 * 19, sizeof(int));
     int visited[361];
     int i, j;
     for(j = 0; j < 19; ++j){
@@ -222,7 +221,7 @@ void print_board(float *board, int swap, int *indexes)
             int index = j*19 + i;
             if(indexes){
                 int found = 0;
-                for(n = 0; n < nind; ++n){
+                for (n = 0; n < n_ind; ++n) {
                     if(index == indexes[n]){
                         found = 1;
                         /*
@@ -365,9 +364,9 @@ int generate_move(network net, int player, float *board, int multi, float thresh
         }
     }
 
-    int indexes[nind];
-    top_k(move, 19*19, nind, indexes);
-    if(thresh > move[indexes[0]]) thresh = move[indexes[nind-1]];
+    int indexes[n_ind];
+    top_k(move, 19*19, n_ind, indexes);
+    if(thresh > move[indexes[0]]) thresh = move[indexes[n_ind-1]];
 
     for(i = 0; i < 19; ++i){
         for(j = 0; j < 19; ++j){
@@ -382,12 +381,12 @@ int generate_move(network net, int player, float *board, int multi, float thresh
     int index = sample_array(move, 19*19);
 
     if(print){
-        top_k(move, 19*19, nind, indexes);
-        for(i = 0; i < nind; ++i){
+        top_k(move, 19*19, n_ind, indexes);
+        for(i = 0; i < n_ind; ++i){
             if (!move[indexes[i]]) indexes[i] = -1;
         }
         print_board(board, player, indexes);
-        for(i = 0; i < nind; ++i){
+        for(i = 0; i < n_ind; ++i){
             fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]);
         }
     }
@@ -411,9 +410,9 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
     set_batch_network(&net, 1);
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
 
-    float *board = calloc(19*19, sizeof(float));
-    float *move = calloc(19*19, sizeof(float));
-    moves m = load_go_moves("/home/pjreddie/backup/go.test");
+    float* board = (float*)calloc(19 * 19, sizeof(float));
+    float* move = (float*)calloc(19 * 19, sizeof(float));
+    moves m = load_go_moves("backup/go.test");
 
     int N = m.n;
     int i;
@@ -439,9 +438,9 @@ void engine_go(char *filename, char *weightfile, int multi)
     }
     srand(time(0));
     set_batch_network(&net, 1);
-    float *board = calloc(19*19, sizeof(float));
-    char *one = calloc(91, sizeof(char));
-    char *two = calloc(91, sizeof(char));
+    float* board = (float*)calloc(19 * 19, sizeof(float));
+    char* one = (char*)calloc(91, sizeof(char));
+    char* two = (char*)calloc(91, sizeof(char));
     int passed = 0;
     while(1){
         char buff[256];
@@ -612,8 +611,8 @@ void test_go(char *cfg, char *weights, int multi)
     }
     srand(time(0));
     set_batch_network(&net, 1);
-    float *board = calloc(19*19, sizeof(float));
-    float *move = calloc(19*19, sizeof(float));
+    float* board = (float*)calloc(19 * 19, sizeof(float));
+    float* move = (float*)calloc(19 * 19, sizeof(float));
     int color = 1;
     while(1){
         float *output = network_predict(net, board);
@@ -642,11 +641,11 @@ void test_go(char *cfg, char *weights, int multi)
             if(board[i]) move[i] = 0;
         }
 
-        int indexes[nind];
+        int indexes[n_ind];
         int row, col;
-        top_k(move, 19*19, nind, indexes);
+        top_k(move, 19 * 19, n_ind, indexes);
         print_board(board, color, indexes);
-        for(i = 0; i < nind; ++i){
+        for (i = 0; i < n_ind; ++i) {
             int index = indexes[i];
             row = index / 19;
             col = index % 19;
@@ -664,7 +663,7 @@ void test_go(char *cfg, char *weights, int multi)
         int cnum = sscanf(line, "%c", &c);
         if (strlen(line) == 0 || dnum) {
             --picked;
-            if (picked < nind){
+            if (picked < n_ind){
                 int index = indexes[picked];
                 row = index / 19;
                 col = index % 19;
@@ -764,9 +763,9 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
     int count = 0;
     set_batch_network(&net, 1);
     set_batch_network(&net2, 1);
-    float *board = calloc(19*19, sizeof(float));
-    char *one = calloc(91, sizeof(char));
-    char *two = calloc(91, sizeof(char));
+    float* board = (float*)calloc(19 * 19, sizeof(float));
+    char* one = (char*)calloc(91, sizeof(char));
+    char* two = (char*)calloc(91, sizeof(char));
     int done = 0;
     int player = 1;
     int p1 = 0;
diff --git a/src/gru_layer.c b/src/gru_layer.c
index 045bbb33..eac751a5 100644
--- a/src/gru_layer.c
+++ b/src/gru_layer.c
@@ -30,42 +30,42 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
 {
     fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs);
     batch = batch / steps;
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.batch = batch;
     l.type = GRU;
     l.steps = steps;
     l.inputs = inputs;
 
-    l.input_z_layer = malloc(sizeof(layer));
+    l.input_z_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.input_z_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.input_z_layer->batch = batch;
 
-    l.state_z_layer = malloc(sizeof(layer));
+    l.state_z_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.state_z_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.state_z_layer->batch = batch;
 
 
 
-    l.input_r_layer = malloc(sizeof(layer));
+    l.input_r_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.input_r_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.input_r_layer->batch = batch;
 
-    l.state_r_layer = malloc(sizeof(layer));
+    l.state_r_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.state_r_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.state_r_layer->batch = batch;
 
 
 
-    l.input_h_layer = malloc(sizeof(layer));
+    l.input_h_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.input_h_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.input_h_layer->batch = batch;
 
-    l.state_h_layer = malloc(sizeof(layer));
+    l.state_h_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.state_h_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.state_h_layer->batch = batch;
@@ -74,16 +74,16 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
 
 
     l.outputs = outputs;
-    l.output = calloc(outputs*batch*steps, sizeof(float));
-    l.delta = calloc(outputs*batch*steps, sizeof(float));
-    l.state = calloc(outputs*batch, sizeof(float));
-    l.prev_state = calloc(outputs*batch, sizeof(float));
-    l.forgot_state = calloc(outputs*batch, sizeof(float));
-    l.forgot_delta = calloc(outputs*batch, sizeof(float));
+    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
+    l.delta = (float*)calloc(outputs * batch * steps, sizeof(float));
+    l.state = (float*)calloc(outputs * batch, sizeof(float));
+    l.prev_state = (float*)calloc(outputs * batch, sizeof(float));
+    l.forgot_state = (float*)calloc(outputs * batch, sizeof(float));
+    l.forgot_delta = (float*)calloc(outputs * batch, sizeof(float));
 
-    l.r_cpu = calloc(outputs*batch, sizeof(float));
-    l.z_cpu = calloc(outputs*batch, sizeof(float));
-    l.h_cpu = calloc(outputs*batch, sizeof(float));
+    l.r_cpu = (float*)calloc(outputs * batch, sizeof(float));
+    l.z_cpu = (float*)calloc(outputs * batch, sizeof(float));
+    l.h_cpu = (float*)calloc(outputs * batch, sizeof(float));
 
     l.forward = forward_gru_layer;
     l.backward = backward_gru_layer;
diff --git a/src/gru_layer.h b/src/gru_layer.h
index 9e19cee1..ae6b55f4 100644
--- a/src/gru_layer.h
+++ b/src/gru_layer.h
@@ -6,6 +6,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
 
 void forward_gru_layer(layer l, network_state state);
@@ -20,5 +23,8 @@ void push_gru_layer(layer l);
 void pull_gru_layer(layer l);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/http_stream.cpp b/src/http_stream.cpp
index 3eb76ac0..091a5aad 100644
--- a/src/http_stream.cpp
+++ b/src/http_stream.cpp
@@ -1,3 +1,4 @@
+#include "image.h"
 #include "http_stream.h"
 
 #ifdef OPENCV
@@ -17,9 +18,10 @@ using std::endl;
 // socket related abstractions:
 //
 #ifdef _WIN32
+#ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "ws2_32.lib")
-#include <winsock.h>
-#include <windows.h>
+#endif
+#include "gettimeofday.h"
 #include <time.h>
 #define PORT        unsigned long
 #define ADDRPOINTER   int*
@@ -44,7 +46,7 @@ static int close_socket(SOCKET s) {
     return result;
 }
 #else   // nix
-#include <unistd.h>
+#include "darkunistd.h"
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -85,16 +87,15 @@ static int close_socket(SOCKET s) {
 #endif // _WIN32
 
 
-#include "opencv2/opencv.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/opencv.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio.hpp"
+#include <opencv2/videoio/videoio.hpp>
 #endif
 using namespace cv;
 
-#include "image.h"
 
 
 class MJPG_sender
@@ -195,7 +196,8 @@ public:
         std::vector<int> params;
         params.push_back(IMWRITE_JPEG_QUALITY);
         params.push_back(quality);
-        cv::imencode(".jpg", frame, outbuf, params);
+        //cv::imencode(".jpg", frame, outbuf, params);  //REMOVED FOR COMPATIBILITY
+        std::cerr << "cv::imencode call disabled!" << std::endl;
         size_t outlen = outbuf.size();
 
 #ifdef _WIN32
@@ -227,17 +229,17 @@ public:
                 }
                 maxfd = (maxfd>client ? maxfd : client);
                 FD_SET(client, &master);
-                _write(client, "HTTP/1.0 200 OK\r\n", 0);
+                _write(client, "HTTP/1.0 200 OK\n", 0);
                 _write(client,
-                    "Server: Mozarella/2.2\r\n"
-                    "Accept-Range: bytes\r\n"
-                    "Connection: close\r\n"
-                    "Max-Age: 0\r\n"
-                    "Expires: 0\r\n"
-                    "Cache-Control: no-cache, private\r\n"
-                    "Pragma: no-cache\r\n"
-                    "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n"
-                    "\r\n", 0);
+                    "Server: Mozarella/2.2\n"
+                    "Accept-Range: bytes\n"
+                    "Connection: close\n"
+                    "Max-Age: 0\n"
+                    "Expires: 0\n"
+                    "Cache-Control: no-cache, private\n"
+                    "Pragma: no-cache\n"
+                    "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\n"
+                    "\n", 0);
                 cerr << "MJPG_sender: new client " << client << endl;
             }
             else // existing client, just stream pix
@@ -249,7 +251,7 @@ public:
                 }
 
                 char head[400];
-                sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen);
+                sprintf(head, "--mjpegstream\nContent-Type: image/jpeg\nContent-Length: %zu\n\n", outlen);
                 _write(s, head, 0);
                 int n = _write(s, (char*)(&outbuf[0]), outlen);
                 //cerr << "known client " << s << " " << n << endl;
@@ -406,18 +408,18 @@ public:
                 }
                 maxfd = (maxfd>client ? maxfd : client);
                 FD_SET(client, &master);
-                _write(client, "HTTP/1.0 200 OK\r\n", 0);
+                _write(client, "HTTP/1.0 200 OK\n", 0);
                 _write(client,
-                    "Server: Mozarella/2.2\r\n"
-                    "Accept-Range: bytes\r\n"
-                    "Connection: close\r\n"
-                    "Max-Age: 0\r\n"
-                    "Expires: 0\r\n"
-                    "Cache-Control: no-cache, private\r\n"
-                    "Pragma: no-cache\r\n"
-                    "Content-Type: application/json\r\n"
+                    "Server: Mozarella/2.2\n"
+                    "Accept-Range: bytes\n"
+                    "Connection: close\n"
+                    "Max-Age: 0\n"
+                    "Expires: 0\n"
+                    "Cache-Control: no-cache, private\n"
+                    "Pragma: no-cache\n"
+                    "Content-Type: application/json\n"
                     //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n"
-                    "\r\n", 0);
+                    "\n", 0);
                 _write(client, "[\n", 0);   // open JSON array
                 int n = _write(client, outputbuf, outlen);
                 cerr << "JSON_sender: new client " << client << endl;
@@ -473,7 +475,7 @@ void send_json(detection *dets, int nboxes, int classes, char **names, long long
 
 // ----------------------------------------
 
-CvCapture* get_capture_video_stream(char *path) {
+CvCapture* get_capture_video_stream(const char *path) {
     CvCapture* cap = NULL;
     try {
         cap = (CvCapture*)new cv::VideoCapture(path);
@@ -511,7 +513,7 @@ IplImage* get_webcam_frame(CvCapture *cap) {
             src = cvCloneImage(&tmp);
         }
         else {
-            std::cout << " Video-stream stoped! \n";
+            std::cout << " Video-stream stopped! \n";
         }
     }
     catch (...) {
@@ -536,9 +538,6 @@ int get_stream_fps_cpp(CvCapture *cap) {
     return fps;
 }
 // ----------------------------------------
-extern "C" {
-    image ipl_to_image(IplImage* src);    // image.c
-}
 
 image image_data_augmentation(IplImage* ipl, int w, int h,
     int pleft, int ptop, int swidth, int sheight, int flip,
@@ -701,4 +700,4 @@ void stop_timer_and_show() {
 }
 void stop_timer_and_show_name(char *name) { stop_timer_and_show(); }
 void total_time() {}
-#endif // C++11
\ No newline at end of file
+#endif // C++11
diff --git a/src/http_stream.h b/src/http_stream.h
index d7ec815c..d96d7d4b 100644
--- a/src/http_stream.h
+++ b/src/http_stream.h
@@ -1,11 +1,14 @@
-#pragma once
 #ifndef HTTP_STREAM_H
 #define HTTP_STREAM_H
 #include "darknet.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
+#include <opencv2/core/version.hpp>
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
+#ifndef CV_VERSION_EPOCH
+#include <opencv2/videoio/videoio_c.h>
+#endif
 #endif
 
 #ifdef __cplusplus
@@ -18,7 +21,7 @@ extern "C" {
 void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout);
 void send_mjpeg(IplImage* ipl, int port, int timeout, int quality);
 CvCapture* get_capture_webcam(int index);
-CvCapture* get_capture_video_stream(char *path);
+CvCapture* get_capture_video_stream(const char *path);
 IplImage* get_webcam_frame(CvCapture *cap);
 int get_stream_fps_cpp(CvCapture *cap);
 
diff --git a/src/im2col.h b/src/im2col.h
index ab4f3483..f510d61e 100644
--- a/src/im2col.h
+++ b/src/im2col.h
@@ -5,9 +5,14 @@
 #include <stdint.h>
 #include "darknet.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 void im2col_cpu(float* data_im,
         int channels, int height, int width,
         int ksize, int stride, int pad, float* data_col);
+float im2col_get_pixel(float* im, int height, int width, int channels,
+    int row, int col, int channel, int pad);
 
 #ifdef GPU
 
@@ -63,5 +68,8 @@ void convolve_bin_cpu(float *input, float *weights, float *output, int in_w, int
 
 void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
 
+#endif
+#ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/src/im2col_kernels.cu b/src/im2col_kernels.cu
index 1cb4cf34..876d6ce0 100644
--- a/src/im2col_kernels.cu
+++ b/src/im2col_kernels.cu
@@ -1,19 +1,15 @@
-﻿#include "cuda_runtime.h"
-#include "curand.h"
-#include "cublas_v2.h"
+﻿#include <cuda_runtime.h>
+#include <curand.h>
+#include <cublas_v2.h>
 #include <stdint.h>
 
-extern "C" {
 #include "im2col.h"
 #include "cuda.h"
-}
 
 #include <stdio.h>
 #include <assert.h>
-#include <cuda.h>
+//#include <cuda.h>
 
-#define FULL_MASK 0xffffffff
-#define WARP_SIZE 32
 
 template<typename T1, typename T2>
 __device__ inline T1 __shfl_custom(T1 val, T2 lane) {
@@ -154,11 +150,6 @@ __global__ void im2col_align_gpu_kernel(const int n, const float* data_im,
 {
     //__shared__ float tmp_s[1];
 
-//#define SHRED_VALS ((BLOCK / 169) * )
-    //__shared__ float dst_s[1024];
-    //__shared__ float dst_s[1024];
-    //__shared__ uint32_t bit_s[32];
-    //__shared__ uint8_t bit_s[128];
 
     int index = blockIdx.x*blockDim.x + threadIdx.x;
     for (; index < n; index += blockDim.x*gridDim.x) {
@@ -604,8 +595,7 @@ __device__ void transpose32_optimized(uint32_t A[32]) {
     }
 }
 
-#define BLOCK_TRANSPOSE32 256
-
+extern "C" {
 __device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n)
 {
     //unsigned A_tmp[32];
@@ -626,7 +616,7 @@ __device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B
     #pragma unroll 32
     for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i];
 }
-
+}
 
 // transpose 32x32 bit
 __global__ void transpose_bin_gpu_kernel_32(uint32_t *A, uint32_t *B, const int n, const int m,
diff --git a/src/image.c b/src/image.c
index f95b478a..31a1819d 100644
--- a/src/image.c
+++ b/src/image.c
@@ -1,4 +1,3 @@
-#include "darknet.h"
 #include "image.h"
 #include "utils.h"
 #include "blas.h"
@@ -6,25 +5,31 @@
 #include <stdio.h>
 #include <math.h>
 
+#ifndef STB_IMAGE_IMPLEMENTATION
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
+#endif
+#ifndef STB_IMAGE_WRITE_IMPLEMENTATION
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include "stb_image_write.h"
+#endif
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
-#include "opencv2/core/types_c.h"
-#include "opencv2/core/version.hpp"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
+#include <opencv2/core/types_c.h>
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
-#include "opencv2/imgcodecs/imgcodecs_c.h"
+#include <opencv2/videoio/videoio_c.h>
+#include <opencv2/imgcodecs/imgcodecs_c.h>
 #include "http_stream.h"
 #endif
 #include "http_stream.h"
 
+#ifndef CV_RGB
 #define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
 #endif
+#endif
 
 extern int check_mistakes;
 int windows = 0;
@@ -255,9 +260,9 @@ image **load_alphabet()
 {
     int i, j;
     const int nsize = 8;
-    image **alphabets = calloc(nsize, sizeof(image));
+    image** alphabets = (image**)calloc(nsize, sizeof(image*));
     for(j = 0; j < nsize; ++j){
-        alphabets[j] = calloc(128, sizeof(image));
+        alphabets[j] = (image*)calloc(128, sizeof(image));
         for(i = 32; i < 127; ++i){
             char buff[256];
             sprintf(buff, "data/labels/%d_%d.png", i, j);
@@ -273,7 +278,7 @@ image **load_alphabet()
 detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names)
 {
     int selected_num = 0;
-    detection_with_class* result_arr = calloc(dets_num, sizeof(detection_with_class));
+    detection_with_class* result_arr = (detection_with_class*)calloc(dets_num, sizeof(detection_with_class));
     int i;
     for (i = 0; i < dets_num; ++i) {
         int best_class = -1;
@@ -505,7 +510,7 @@ void save_cv_png(IplImage *img, const char *name)
     IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
     cvCvtColor(img, img_rgb, CV_RGB2BGR);
     stbi_write_png(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 0);
-    cvRelease(&img_rgb);
+    cvRelease((void**)&img_rgb);
 }
 
 void save_cv_jpg(IplImage *img, const char *name)
@@ -513,7 +518,7 @@ void save_cv_jpg(IplImage *img, const char *name)
     IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
     cvCvtColor(img, img_rgb, CV_RGB2BGR);
     stbi_write_jpg(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 80);
-    cvRelease(&img_rgb);
+    cvRelease((void**)&img_rgb);
 }
 
 void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output)
@@ -952,8 +957,8 @@ void normalize_image(image p)
 
 void normalize_image2(image p)
 {
-    float *min = calloc(p.c, sizeof(float));
-    float *max = calloc(p.c, sizeof(float));
+    float* min = (float*)calloc(p.c, sizeof(float));
+    float* max = (float*)calloc(p.c, sizeof(float));
     int i,j;
     for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w];
 
@@ -982,7 +987,7 @@ void normalize_image2(image p)
 image copy_image(image p)
 {
     image copy = p;
-    copy.data = calloc(p.h*p.w*p.c, sizeof(float));
+    copy.data = (float*)calloc(p.h * p.w * p.c, sizeof(float));
     memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
     return copy;
 }
@@ -1252,7 +1257,7 @@ void save_image_png(image im, const char *name)
     char buff[256];
     //sprintf(buff, "%s (%d)", name, windows);
     sprintf(buff, "%s.png", name);
-    unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
+    unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
     int i,k;
     for(k = 0; k < im.c; ++k){
         for(i = 0; i < im.w*im.h; ++i){
@@ -1273,7 +1278,7 @@ void save_image_options(image im, const char *name, IMTYPE f, int quality)
     else if (f == TGA) sprintf(buff, "%s.tga", name);
     else if (f == JPG) sprintf(buff, "%s.jpg", name);
     else               sprintf(buff, "%s.png", name);
-    unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
+    unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
     int i, k;
     for (k = 0; k < im.c; ++k) {
         for (i = 0; i < im.w*im.h; ++i) {
@@ -1331,14 +1336,14 @@ image make_empty_image(int w, int h, int c)
 image make_image(int w, int h, int c)
 {
     image out = make_empty_image(w,h,c);
-    out.data = calloc(h*w*c, sizeof(float));
+    out.data = (float*)calloc(h * w * c, sizeof(float));
     return out;
 }
 
 image make_random_image(int w, int h, int c)
 {
     image out = make_empty_image(w,h,c);
-    out.data = calloc(h*w*c, sizeof(float));
+    out.data = (float*)calloc(h * w * c, sizeof(float));
     int i;
     for(i = 0; i < w*h*c; ++i){
         out.data[i] = (rand_normal() * .25) + .5;
@@ -1585,7 +1590,7 @@ image random_augment_image(image im, float angle, float aspect, int low, int hig
     int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
     float scale = (float)r / min;
 
-    float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
+    float rad = rand_uniform(-angle, angle) * 2.0 * M_PI / 360.;
 
     float dx = (im.w*scale/aspect - size) / 2.;
     float dy = (im.h*scale - size) / 2.;
diff --git a/src/image.h b/src/image.h
index 5c759b4d..20170932 100644
--- a/src/image.h
+++ b/src/image.h
@@ -7,8 +7,20 @@
 #include <float.h>
 #include <string.h>
 #include <math.h>
+#ifdef OPENCV
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
+#include <opencv2/core/types_c.h>
+#include <opencv2/core/version.hpp>
+#ifndef CV_VERSION_EPOCH
+#include <opencv2/videoio/videoio_c.h>
+#include <opencv2/imgcodecs/imgcodecs_c.h>
+#endif
+#endif
 #include "box.h"
-
+#ifdef __cplusplus
+extern "C" {
+#endif
 /*
 typedef struct {
     int w;
@@ -80,6 +92,11 @@ image load_image(char *filename, int w, int h, int c);
 //LIB_API image load_image_color(char *filename, int w, int h);
 image **load_alphabet();
 
+#ifdef OPENCV
+LIB_API image get_image_from_stream(CvCapture* cap);
+LIB_API image get_image_from_stream_cpp(CvCapture* cap);
+LIB_API image ipl_to_image(IplImage* src);
+#endif
 //float get_pixel(image m, int x, int y, int c);
 //float get_pixel_extend(image m, int x, int y, int c);
 //void set_pixel(image m, int x, int y, int c, float val);
@@ -90,5 +107,8 @@ image get_image_layer(image m, int l);
 
 //LIB_API void free_image(image m);
 void test_resize(char *filename);
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/layer.c b/src/layer.c
index 6aeceec9..627b931d 100644
--- a/src/layer.c
+++ b/src/layer.c
@@ -98,7 +98,7 @@ void free_layer(layer l)
 	if (l.x_gpu)                   cuda_free(l.x_gpu);
 	if (l.x_norm_gpu)              cuda_free(l.x_norm_gpu);
 
-    if (l.align_bit_weights_gpu)   cuda_free(l.align_bit_weights_gpu);
+    if (l.align_bit_weights_gpu)   cuda_free((float *)l.align_bit_weights_gpu);
     if (l.mean_arr_gpu)            cuda_free(l.mean_arr_gpu);
     if (l.align_workspace_gpu)     cuda_free(l.align_workspace_gpu);
     if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu);
diff --git a/src/layer.h b/src/layer.h
index 692df88c..e92d3b4a 100644
--- a/src/layer.h
+++ b/src/layer.h
@@ -1,10 +1,12 @@
 #ifndef BASE_LAYER_H
 #define BASE_LAYER_H
 
-#include "darknet.h"
 #include "activations.h"
 #include "stddef.h"
 #include "tree.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 //struct network_state;
 
@@ -330,4 +332,7 @@ struct layer{
 */
 //void free_layer(layer);
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/list.c b/src/list.c
index 39e3033d..79fee08d 100644
--- a/src/list.c
+++ b/src/list.c
@@ -5,7 +5,7 @@
 
 list *make_list()
 {
-    list *l = malloc(sizeof(list));
+    list* l = (list*)malloc(sizeof(list));
     l->size = 0;
     l->front = 0;
     l->back = 0;
@@ -40,18 +40,18 @@ void *list_pop(list *l){
 
 void list_insert(list *l, void *val)
 {
-    node *new = malloc(sizeof(node));
-    new->val = val;
-    new->next = 0;
+    node* newnode = (node*)malloc(sizeof(node));
+    newnode->val = val;
+    newnode->next = 0;
 
     if(!l->back){
-        l->front = new;
-        new->prev = 0;
+        l->front = newnode;
+        newnode->prev = 0;
     }else{
-        l->back->next = new;
-        new->prev = l->back;
+        l->back->next = newnode;
+        newnode->prev = l->back;
     }
-    l->back = new;
+    l->back = newnode;
     ++l->size;
 }
 
@@ -84,7 +84,7 @@ void free_list_contents_kvp(list *l)
 {
     node *n = l->front;
     while (n) {
-        kvp *p = n->val;
+        kvp* p = (kvp*)n->val;
         free(p->key);
         free(n->val);
         n = n->next;
@@ -93,7 +93,7 @@ void free_list_contents_kvp(list *l)
 
 void **list_to_array(list *l)
 {
-    void **a = calloc(l->size, sizeof(void*));
+    void** a = (void**)calloc(l->size, sizeof(void*));
     int count = 0;
     node *n = l->front;
     while(n){
diff --git a/src/list.h b/src/list.h
index c06d1429..2623174a 100644
--- a/src/list.h
+++ b/src/list.h
@@ -13,6 +13,9 @@ typedef struct list{
     node *back;
 } list;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 list *make_list();
 int list_find(list *l, void *val);
 
@@ -24,4 +27,7 @@ void free_list(list *l);
 void free_list_contents(list *l);
 void free_list_contents_kvp(list *l);
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/local_layer.c b/src/local_layer.c
index 31f0ca6b..f304511f 100644
--- a/src/local_layer.c
+++ b/src/local_layer.c
@@ -26,7 +26,7 @@ int local_out_width(local_layer l)
 local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
 {
     int i;
-    local_layer l = {0};
+    local_layer l = { (LAYER_TYPE)0 };
     l.type = LOCAL;
 
     l.h = h;
@@ -47,19 +47,19 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
     l.outputs = l.out_h * l.out_w * l.out_c;
     l.inputs = l.w * l.h * l.c;
 
-    l.weights = calloc(c*n*size*size*locations, sizeof(float));
-    l.weight_updates = calloc(c*n*size*size*locations, sizeof(float));
+    l.weights = (float*)calloc(c * n * size * size * locations, sizeof(float));
+    l.weight_updates = (float*)calloc(c * n * size * size * locations, sizeof(float));
 
-    l.biases = calloc(l.outputs, sizeof(float));
-    l.bias_updates = calloc(l.outputs, sizeof(float));
+    l.biases = (float*)calloc(l.outputs, sizeof(float));
+    l.bias_updates = (float*)calloc(l.outputs, sizeof(float));
 
     // float scale = 1./sqrt(size*size*c);
     float scale = sqrt(2./(size*size*c));
     for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
 
-    l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
-    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
-    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));
+    l.col_image = (float*)calloc(out_h * out_w * size * size * c, sizeof(float));
+    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
     
     l.forward = forward_local_layer;
     l.backward = backward_local_layer;
diff --git a/src/local_layer.h b/src/local_layer.h
index 28915d81..198ca362 100644
--- a/src/local_layer.h
+++ b/src/local_layer.h
@@ -9,6 +9,9 @@
 
 typedef layer local_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 #ifdef GPU
 void forward_local_layer_gpu(local_layer layer, network_state state);
 void backward_local_layer_gpu(local_layer layer, network_state state);
@@ -27,5 +30,8 @@ void update_local_layer(local_layer layer, int batch, float learning_rate, float
 void bias_output(float *output, float *biases, int batch, int n, int size);
 void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/lstm_layer.c b/src/lstm_layer.c
index 657d500c..2894d3d3 100644
--- a/src/lstm_layer.c
+++ b/src/lstm_layer.c
@@ -30,7 +30,7 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
 {
     fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs);
     batch = batch / steps;
-    layer l = { 0 };
+    layer l = { (LAYER_TYPE)0 };
     l.batch = batch;
     l.type = LSTM;
     l.steps = steps;
@@ -39,49 +39,49 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
     l.out_h = 1;
     l.out_c = outputs;
 
-    l.uf = malloc(sizeof(layer));
+    l.uf = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.uf->batch = batch;
     if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;
 
-    l.ui = malloc(sizeof(layer));
+    l.ui = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.ui->batch = batch;
     if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;
 
-    l.ug = malloc(sizeof(layer));
+    l.ug = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.ug->batch = batch;
     if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;
 
-    l.uo = malloc(sizeof(layer));
+    l.uo = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
     l.uo->batch = batch;
     if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;
 
-    l.wf = malloc(sizeof(layer));
+    l.wf = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.wf->batch = batch;
     if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;
 
-    l.wi = malloc(sizeof(layer));
+    l.wi = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.wi->batch = batch;
     if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;
 
-    l.wg = malloc(sizeof(layer));
+    l.wg = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.wg->batch = batch;
     if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;
 
-    l.wo = malloc(sizeof(layer));
+    l.wo = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
     l.wo->batch = batch;
@@ -90,27 +90,27 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
     l.batch_normalize = batch_normalize;
     l.outputs = outputs;
 
-    l.output = calloc(outputs*batch*steps, sizeof(float));
-    l.state = calloc(outputs*batch, sizeof(float));
+    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
+    l.state = (float*)calloc(outputs * batch, sizeof(float));
 
     l.forward = forward_lstm_layer;
     l.update = update_lstm_layer;
 
-    l.prev_state_cpu =  calloc(batch*outputs, sizeof(float));
-    l.prev_cell_cpu =   calloc(batch*outputs, sizeof(float));
-    l.cell_cpu =        calloc(batch*outputs*steps, sizeof(float));
+    l.prev_state_cpu =  (float*)calloc(batch*outputs, sizeof(float));
+    l.prev_cell_cpu =   (float*)calloc(batch*outputs, sizeof(float));
+    l.cell_cpu =        (float*)calloc(batch*outputs*steps, sizeof(float));
 
-    l.f_cpu =           calloc(batch*outputs, sizeof(float));
-    l.i_cpu =           calloc(batch*outputs, sizeof(float));
-    l.g_cpu =           calloc(batch*outputs, sizeof(float));
-    l.o_cpu =           calloc(batch*outputs, sizeof(float));
-    l.c_cpu =           calloc(batch*outputs, sizeof(float));
-    l.h_cpu =           calloc(batch*outputs, sizeof(float));
-    l.temp_cpu =        calloc(batch*outputs, sizeof(float));
-    l.temp2_cpu =       calloc(batch*outputs, sizeof(float));
-    l.temp3_cpu =       calloc(batch*outputs, sizeof(float));
-    l.dc_cpu =          calloc(batch*outputs, sizeof(float));
-    l.dh_cpu =          calloc(batch*outputs, sizeof(float));
+    l.f_cpu =           (float*)calloc(batch*outputs, sizeof(float));
+    l.i_cpu =           (float*)calloc(batch*outputs, sizeof(float));
+    l.g_cpu =           (float*)calloc(batch*outputs, sizeof(float));
+    l.o_cpu =           (float*)calloc(batch*outputs, sizeof(float));
+    l.c_cpu =           (float*)calloc(batch*outputs, sizeof(float));
+    l.h_cpu =           (float*)calloc(batch*outputs, sizeof(float));
+    l.temp_cpu =        (float*)calloc(batch*outputs, sizeof(float));
+    l.temp2_cpu =       (float*)calloc(batch*outputs, sizeof(float));
+    l.temp3_cpu =       (float*)calloc(batch*outputs, sizeof(float));
+    l.dc_cpu =          (float*)calloc(batch*outputs, sizeof(float));
+    l.dh_cpu =          (float*)calloc(batch*outputs, sizeof(float));
 
 #ifdef GPU
     l.forward_gpu = forward_lstm_layer_gpu;
diff --git a/src/lstm_layer.h b/src/lstm_layer.h
index ad37c96f..d951ca37 100644
--- a/src/lstm_layer.h
+++ b/src/lstm_layer.h
@@ -6,7 +6,10 @@
 #include "network.h"
 #define USET
 
-layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
+#ifdef __cplusplus
+extern "C" {
+#endif
+LIB_API layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
 
 void forward_lstm_layer(layer l, network_state state); 
 void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay);
@@ -15,6 +18,9 @@ void update_lstm_layer(layer l, int batch, float learning_rate, float momentum,
 void forward_lstm_layer_gpu(layer l, network_state state);
 void backward_lstm_layer_gpu(layer l, network_state state);
 void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); 
+#endif
 
+#ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/src/matrix.c b/src/matrix.c
index 74300280..c7df22b6 100644
--- a/src/matrix.c
+++ b/src/matrix.c
@@ -15,7 +15,7 @@ void free_matrix(matrix m)
 
 float matrix_topk_accuracy(matrix truth, matrix guess, int k)
 {
-    int *indexes = calloc(k, sizeof(int));
+    int* indexes = (int*)calloc(k, sizeof(int));
     int n = truth.cols;
     int i,j;
     int correct = 0;
@@ -48,15 +48,15 @@ matrix resize_matrix(matrix m, int size)
     int i;
     if (m.rows == size) return m;
     if (m.rows < size) {
-        m.vals = realloc(m.vals, size*sizeof(float*));
+        m.vals = (float**)realloc(m.vals, size * sizeof(float*));
         for (i = m.rows; i < size; ++i) {
-            m.vals[i] = calloc(m.cols, sizeof(float));
+            m.vals[i] = (float*)calloc(m.cols, sizeof(float));
         }
     } else if (m.rows > size) {
         for (i = size; i < m.rows; ++i) {
             free(m.vals[i]);
         }
-        m.vals = realloc(m.vals, size*sizeof(float*));
+        m.vals = (float**)realloc(m.vals, size * sizeof(float*));
     }
     m.rows = size;
     return m;
@@ -79,9 +79,9 @@ matrix make_matrix(int rows, int cols)
     matrix m;
     m.rows = rows;
     m.cols = cols;
-    m.vals = calloc(m.rows, sizeof(float *));
+    m.vals = (float**)calloc(m.rows, sizeof(float*));
     for(i = 0; i < m.rows; ++i){
-        m.vals[i] = calloc(m.cols, sizeof(float));
+        m.vals[i] = (float*)calloc(m.cols, sizeof(float));
     }
     return m;
 }
@@ -92,7 +92,7 @@ matrix hold_out_matrix(matrix *m, int n)
     matrix h;
     h.rows = n;
     h.cols = m->cols;
-    h.vals = calloc(h.rows, sizeof(float *));
+    h.vals = (float**)calloc(h.rows, sizeof(float*));
     for(i = 0; i < n; ++i){
         int index = rand()%m->rows;
         h.vals[i] = m->vals[index];
@@ -103,7 +103,7 @@ matrix hold_out_matrix(matrix *m, int n)
 
 float *pop_column(matrix *m, int c)
 {
-    float *col = calloc(m->rows, sizeof(float));
+    float* col = (float*)calloc(m->rows, sizeof(float));
     int i, j;
     for(i = 0; i < m->rows; ++i){
         col[i] = m->vals[i][c];
@@ -127,18 +127,18 @@ matrix csv_to_matrix(char *filename)
 
     int n = 0;
     int size = 1024;
-    m.vals = calloc(size, sizeof(float*));
+    m.vals = (float**)calloc(size, sizeof(float*));
     while((line = fgetl(fp))){
         if(m.cols == -1) m.cols = count_fields(line);
         if(n == size){
             size *= 2;
-            m.vals = realloc(m.vals, size*sizeof(float*));
+            m.vals = (float**)realloc(m.vals, size * sizeof(float*));
         }
         m.vals[n] = parse_fields(line, m.cols);
         free(line);
         ++n;
     }
-    m.vals = realloc(m.vals, n*sizeof(float*));
+    m.vals = (float**)realloc(m.vals, n * sizeof(float*));
     m.rows = n;
     return m;
 }
@@ -225,7 +225,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers)
     matrix old_centers = make_matrix(centers.rows, centers.cols);
 
     int i, j;
-    int *counts = calloc(centers.rows, sizeof(int));
+    int *counts = (int*)calloc(centers.rows, sizeof(int));
     for (i = 0; i < centers.rows; ++i) {
         for (j = 0; j < centers.cols; ++j) {
             old_centers.vals[i][j] = centers.vals[i][j];
@@ -268,7 +268,7 @@ void random_centers(matrix data, matrix centers) {
 int *sample(int n)
 {
     int i;
-    int *s = calloc(n, sizeof(int));
+    int* s = (int*)calloc(n, sizeof(int));
     for (i = 0; i < n; ++i) s[i] = i;
     for (i = n - 1; i >= 0; --i) {
         int swap = s[i];
@@ -301,7 +301,7 @@ void copy(float *x, float *y, int n)
 model do_kmeans(matrix data, int k)
 {
     matrix centers = make_matrix(k, data.cols);
-    int *assignments = calloc(data.rows, sizeof(int));
+    int* assignments = (int*)calloc(data.rows, sizeof(int));
     //smart_centers(data, centers);
     random_centers(data, centers);  // IoU = 67.31% after kmeans
 
diff --git a/src/matrix.h b/src/matrix.h
index a56ae8e0..d565722c 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -12,6 +12,9 @@ typedef struct {
     matrix centers;
 } model;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 model do_kmeans(matrix data, int k);
 matrix make_matrix(int rows, int cols);
@@ -28,4 +31,7 @@ matrix resize_matrix(matrix m, int size);
 
 float *pop_column(matrix *m, int c);
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c
index 2eaa7dec..7d3f7974 100644
--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@@ -47,7 +47,7 @@ void cudnn_maxpool_setup(layer *l)
 
 maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
 {
-    maxpool_layer l = {0};
+    maxpool_layer l = { (LAYER_TYPE)0 };
     l.type = MAXPOOL;
     l.batch = batch;
     l.h = h;
@@ -62,9 +62,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
     l.size = size;
     l.stride = stride;
     int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.indexes = calloc(output_size, sizeof(int));
-    l.output =  calloc(output_size, sizeof(float));
-    l.delta =   calloc(output_size, sizeof(float));
+    l.indexes = (int*)calloc(output_size, sizeof(int));
+    l.output = (float*)calloc(output_size, sizeof(float));
+    l.delta = (float*)calloc(output_size, sizeof(float));
     l.forward = forward_maxpool_layer;
     l.backward = backward_maxpool_layer;
     #ifdef GPU
@@ -93,9 +93,9 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
     l->outputs = l->out_w * l->out_h * l->c;
     int output_size = l->outputs * l->batch;
 
-    l->indexes = realloc(l->indexes, output_size * sizeof(int));
-    l->output = realloc(l->output, output_size * sizeof(float));
-    l->delta = realloc(l->delta, output_size * sizeof(float));
+    l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int));
+    l->output = (float*)realloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
 
 #ifdef GPU
     CHECK_CUDA(cudaFree((float *)l->indexes_gpu));
diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h
index 3a60cfe9..ba6a2020 100644
--- a/src/maxpool_layer.h
+++ b/src/maxpool_layer.h
@@ -8,6 +8,9 @@
 
 typedef layer maxpool_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 image get_maxpool_image(maxpool_layer l);
 maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
 void resize_maxpool_layer(maxpool_layer *l, int w, int h);
@@ -20,5 +23,8 @@ void backward_maxpool_layer_gpu(maxpool_layer l, network_state state);
 void cudnn_maxpool_setup(maxpool_layer *l);
 #endif // GPU
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu
index 577864e5..ec207a28 100644
--- a/src/maxpool_layer_kernels.cu
+++ b/src/maxpool_layer_kernels.cu
@@ -2,10 +2,8 @@
 #include "curand.h"
 #include "cublas_v2.h"
 
-extern "C" {
 #include "maxpool_layer.h"
 #include "cuda.h"
-}
 
 __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
 {
diff --git a/src/network.c b/src/network.c
index 68dd3e9a..762ddb67 100644
--- a/src/network.c
+++ b/src/network.c
@@ -180,16 +180,16 @@ network make_network(int n)
 {
     network net = {0};
     net.n = n;
-    net.layers = calloc(net.n, sizeof(layer));
-    net.seen = calloc(1, sizeof(uint64_t));
+    net.layers = (layer*)calloc(net.n, sizeof(layer));
+    net.seen = (uint64_t*)calloc(1, sizeof(uint64_t));
 #ifdef GPU
-    net.input_gpu = calloc(1, sizeof(float *));
-    net.truth_gpu = calloc(1, sizeof(float *));
+    net.input_gpu = (float**)calloc(1, sizeof(float*));
+    net.truth_gpu = (float**)calloc(1, sizeof(float*));
 
-    net.input16_gpu = calloc(1, sizeof(float *));
-    net.output16_gpu = calloc(1, sizeof(float *));
-    net.max_input16_size = calloc(1, sizeof(size_t));
-    net.max_output16_size = calloc(1, sizeof(size_t));
+    net.input16_gpu = (float**)calloc(1, sizeof(float*));
+    net.output16_gpu = (float**)calloc(1, sizeof(float*));
+    net.max_input16_size = (size_t*)calloc(1, sizeof(size_t));
+    net.max_output16_size = (size_t*)calloc(1, sizeof(size_t));
 #endif
     return net;
 }
@@ -300,8 +300,8 @@ float train_network_datum(network net, float *x, float *y)
 float train_network_sgd(network net, data d, int n)
 {
     int batch = net.batch;
-    float *X = calloc(batch*d.X.cols, sizeof(float));
-    float *y = calloc(batch*d.y.cols, sizeof(float));
+    float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
+    float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
 
     int i;
     float sum = 0;
@@ -320,8 +320,8 @@ float train_network(network net, data d)
     assert(d.X.rows % net.batch == 0);
     int batch = net.batch;
     int n = d.X.rows / batch;
-    float *X = calloc(batch*d.X.cols, sizeof(float));
-    float *y = calloc(batch*d.y.cols, sizeof(float));
+    float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
+    float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
 
     int i;
     float sum = 0;
@@ -389,11 +389,11 @@ int recalculate_workspace_size(network *net)
     }
     else {
         free(net->workspace);
-        net->workspace = calloc(1, workspace_size);
+        net->workspace = (float*)calloc(1, workspace_size);
     }
 #else
     free(net->workspace);
-    net->workspace = calloc(1, workspace_size);
+    net->workspace = (float*)calloc(1, workspace_size);
 #endif
     //fprintf(stderr, " Done!\n");
     return 0;
@@ -495,19 +495,19 @@ int resize_network(network *net, int w, int h)
             net->input_pinned_cpu_flag = 1;
         else {
             cudaGetLastError(); // reset CUDA-error
-            net->input_pinned_cpu = calloc(size, sizeof(float));
+            net->input_pinned_cpu = (float*)calloc(size, sizeof(float));
             net->input_pinned_cpu_flag = 0;
         }
         printf(" CUDA allocate done! \n");
     }else {
         free(net->workspace);
-        net->workspace = calloc(1, workspace_size);
+        net->workspace = (float*)calloc(1, workspace_size);
         if(!net->input_pinned_cpu_flag)
-            net->input_pinned_cpu = realloc(net->input_pinned_cpu, size * sizeof(float));
+            net->input_pinned_cpu = (float*)realloc(net->input_pinned_cpu, size * sizeof(float));
     }
 #else
     free(net->workspace);
-    net->workspace = calloc(1, workspace_size);
+    net->workspace = (float*)calloc(1, workspace_size);
 #endif
     //fprintf(stderr, " Done!\n");
     return 0;
@@ -534,7 +534,7 @@ detection_layer get_network_detection_layer(network net)
         }
     }
     fprintf(stderr, "Detection layer not found!!\n");
-    detection_layer l = {0};
+    detection_layer l = { (LAYER_TYPE)0 };
     return l;
 }
 
@@ -632,11 +632,11 @@ detection *make_network_boxes(network *net, float thresh, int *num)
     int i;
     int nboxes = num_detections(net, thresh);
     if (num) *num = nboxes;
-    detection *dets = calloc(nboxes, sizeof(detection));
+    detection* dets = (detection*)calloc(nboxes, sizeof(detection));
     for (i = 0; i < nboxes; ++i) {
-        dets[i].prob = calloc(l.classes, sizeof(float));
+        dets[i].prob = (float*)calloc(l.classes, sizeof(float));
         if (l.coords > 4) {
-            dets[i].mask = calloc(l.coords - 4, sizeof(float));
+            dets[i].mask = (float*)calloc(l.coords - 4, sizeof(float));
         }
     }
     return dets;
@@ -645,10 +645,10 @@ detection *make_network_boxes(network *net, float thresh, int *num)
 
 void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter)
 {
-    box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
-    float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
+    box* boxes = (box*)calloc(l.w * l.h * l.n, sizeof(box));
+    float** probs = (float**)calloc(l.w * l.h * l.n, sizeof(float*));
     int i, j;
-    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float));
+    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
     get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
     for (j = 0; j < l.w*l.h*l.n; ++j) {
         dets[j].classes = l.classes;
@@ -789,7 +789,7 @@ matrix network_predict_data_multi(network net, data test, int n)
     int i,j,b,m;
     int k = get_network_output_size(net);
     matrix pred = make_matrix(test.X.rows, k);
-    float *X = calloc(net.batch*test.X.rows, sizeof(float));
+    float* X = (float*)calloc(net.batch * test.X.rows, sizeof(float));
     for(i = 0; i < test.X.rows; i += net.batch){
         for(b = 0; b < net.batch; ++b){
             if(i+b == test.X.rows) break;
@@ -814,7 +814,7 @@ matrix network_predict_data(network net, data test)
     int i,j,b;
     int k = get_network_output_size(net);
     matrix pred = make_matrix(test.X.rows, k);
-    float *X = calloc(net.batch*test.X.cols, sizeof(float));
+    float* X = (float*)calloc(net.batch * test.X.cols, sizeof(float));
     for(i = 0; i < test.X.rows; i += net.batch){
         for(b = 0; b < net.batch; ++b){
             if(i+b == test.X.rows) break;
diff --git a/src/network.h b/src/network.h
index 10bd99f2..d082e7fc 100644
--- a/src/network.h
+++ b/src/network.h
@@ -6,14 +6,14 @@
 #include <stdint.h>
 #include "layer.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
 
 #include "image.h"
 #include "data.h"
 #include "tree.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 /*
 typedef enum {
     CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index cb6c0f38..38c15387 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -3,7 +3,6 @@
 //#include "cublas_v2.h"
 #include "cuda.h"
 
-extern "C" {
 #include <stdio.h>
 #include <time.h>
 #include <assert.h>
@@ -35,10 +34,9 @@ extern "C" {
 #include "route_layer.h"
 #include "shortcut_layer.h"
 #include "blas.h"
-}
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 #include "http_stream.h"
@@ -396,9 +394,11 @@ void sync_nets(network *nets, int n, int interval)
 float train_networks(network *nets, int n, data d, int interval)
 {
     int i;
+#ifdef _DEBUG
     int batch = nets[0].batch;
     int subdivisions = nets[0].subdivisions;
     assert(batch * subdivisions * n == d.X.rows);
+#endif
     pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
     float *errors = (float *) calloc(n, sizeof(float));
 
diff --git a/src/nightmare.c b/src/nightmare.c
index ec7166cc..433c93b7 100644
--- a/src/nightmare.c
+++ b/src/nightmare.c
@@ -5,7 +5,7 @@
 #include "utils.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 // ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2
diff --git a/src/normalization_layer.c b/src/normalization_layer.c
index 069a0792..9c01f789 100644
--- a/src/normalization_layer.c
+++ b/src/normalization_layer.c
@@ -5,7 +5,7 @@
 layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
 {
     fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
-    layer layer = {0};
+    layer layer = { (LAYER_TYPE)0 };
     layer.type = NORMALIZATION;
     layer.batch = batch;
     layer.h = layer.out_h = h;
@@ -15,10 +15,10 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a
     layer.size = size;
     layer.alpha = alpha;
     layer.beta = beta;
-    layer.output = calloc(h * w * c * batch, sizeof(float));
-    layer.delta = calloc(h * w * c * batch, sizeof(float));
-    layer.squared = calloc(h * w * c * batch, sizeof(float));
-    layer.norms = calloc(h * w * c * batch, sizeof(float));
+    layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
+    layer.squared = (float*)calloc(h * w * c * batch, sizeof(float));
+    layer.norms = (float*)calloc(h * w * c * batch, sizeof(float));
     layer.inputs = w*h*c;
     layer.outputs = layer.inputs;
 
@@ -46,10 +46,10 @@ void resize_normalization_layer(layer *layer, int w, int h)
     layer->out_w = w;
     layer->inputs = w*h*c;
     layer->outputs = layer->inputs;
-    layer->output = realloc(layer->output, h * w * c * batch * sizeof(float));
-    layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float));
-    layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float));
-    layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float));
+    layer->output = (float*)realloc(layer->output, h * w * c * batch * sizeof(float));
+    layer->delta = (float*)realloc(layer->delta, h * w * c * batch * sizeof(float));
+    layer->squared = (float*)realloc(layer->squared, h * w * c * batch * sizeof(float));
+    layer->norms = (float*)realloc(layer->norms, h * w * c * batch * sizeof(float));
 #ifdef GPU
     cuda_free(layer->output_gpu);
     cuda_free(layer->delta_gpu); 
diff --git a/src/normalization_layer.h b/src/normalization_layer.h
index ab327764..b7b3d81a 100644
--- a/src/normalization_layer.h
+++ b/src/normalization_layer.h
@@ -5,6 +5,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
 void resize_normalization_layer(layer *layer, int h, int w);
 void forward_normalization_layer(const layer layer, network_state state);
@@ -16,4 +19,7 @@ void forward_normalization_layer_gpu(const layer layer, network_state state);
 void backward_normalization_layer_gpu(const layer layer, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/option_list.c b/src/option_list.c
index dfab81b0..22bc8c1b 100644
--- a/src/option_list.c
+++ b/src/option_list.c
@@ -3,6 +3,7 @@
 #include <string.h>
 #include "option_list.h"
 #include "utils.h"
+#include "data.h"
 
 list *read_data_cfg(char *filename)
 {
@@ -71,7 +72,7 @@ int read_option(char *s, list *options)
 
 void option_insert(list *l, char *key, char *val)
 {
-    kvp *p = malloc(sizeof(kvp));
+    kvp* p = (kvp*)malloc(sizeof(kvp));
     p->key = key;
     p->val = val;
     p->used = 0;
diff --git a/src/option_list.h b/src/option_list.h
index cb860671..a9e3e0b7 100644
--- a/src/option_list.h
+++ b/src/option_list.h
@@ -9,6 +9,9 @@ typedef struct{
     int used;
 } kvp;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 list *read_data_cfg(char *filename);
 int read_option(char *s, list *options);
@@ -28,4 +31,7 @@ void option_unused(list *l);
 
 //LIB_API metadata get_metadata(char *file);
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/parser.c b/src/parser.c
index df671008..ff344f01 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -272,7 +272,7 @@ int *parse_yolo_mask(char *a, int *num)
         for (i = 0; i < len; ++i) {
             if (a[i] == ',') ++n;
         }
-        mask = calloc(n, sizeof(int));
+        mask = (int*)calloc(n, sizeof(int));
         for (i = 0; i < n; ++i) {
             int val = atoi(a);
             mask[i] = val;
@@ -587,8 +587,8 @@ route_layer parse_route(list *options, size_params params, network net)
         if (l[i] == ',') ++n;
     }
 
-    int *layers = calloc(n, sizeof(int));
-    int *sizes = calloc(n, sizeof(int));
+    int* layers = (int*)calloc(n, sizeof(int));
+    int* sizes = (int*)calloc(n, sizeof(int));
     for(i = 0; i < n; ++i){
         int index = atoi(l);
         l = strchr(l, ',')+1;
@@ -693,8 +693,8 @@ void parse_net_options(list *options, network *net)
         for(i = 0; i < len; ++i){
             if (l[i] == ',') ++n;
         }
-        int *steps = calloc(n, sizeof(int));
-        float *scales = calloc(n, sizeof(float));
+        int* steps = (int*)calloc(n, sizeof(int));
+        float* scales = (float*)calloc(n, sizeof(float));
         for(i = 0; i < n; ++i){
             int step    = atoi(l);
             float scale = atof(p);
@@ -765,7 +765,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         fprintf(stderr, "%4d ", count);
         s = (section *)n->val;
         options = s->options;
-        layer l = {0};
+        layer l = { (LAYER_TYPE)0 };
         LAYER_TYPE lt = string_to_layer_type(s->type);
         if(lt == CONVOLUTIONAL){
             l = parse_convolutional(options, params);
@@ -864,7 +864,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
         else {
             cudaGetLastError(); // reset CUDA-error
-            net.input_pinned_cpu = calloc(size, sizeof(float));
+            net.input_pinned_cpu = (float*)calloc(size, sizeof(float));
         }
 
         // pre-allocate memory for inference on Tensor Cores (fp16)
@@ -879,12 +879,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
             net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
         }
         else {
-            net.workspace = calloc(1, workspace_size);
+            net.workspace = (float*)calloc(1, workspace_size);
         }
     }
 #else
         if (workspace_size) {
-            net.workspace = calloc(1, workspace_size);
+            net.workspace = (float*)calloc(1, workspace_size);
         }
 #endif
 
@@ -911,7 +911,7 @@ list *read_cfg(char *filename)
         strip(line);
         switch(line[0]){
             case '[':
-                current = malloc(sizeof(section));
+                current = (section*)malloc(sizeof(section));
                 list_insert(sections, current);
                 current->options = make_list();
                 current->type = line;
@@ -1091,7 +1091,7 @@ void save_weights(network net, char *filename)
 
 void transpose_matrix(float *a, int rows, int cols)
 {
-    float *transpose = calloc(rows*cols, sizeof(float));
+    float* transpose = (float*)calloc(rows * cols, sizeof(float));
     int x, y;
     for(x = 0; x < rows; ++x){
         for(y = 0; y < cols; ++y){
@@ -1313,7 +1313,7 @@ void load_weights(network *net, char *filename)
 network *load_network_custom(char *cfg, char *weights, int clear, int batch)
 {
     printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
-    network *net = calloc(1, sizeof(network));
+    network* net = (network*)calloc(1, sizeof(network));
     *net = parse_network_cfg_custom(cfg, batch, 0);
     if (weights && weights[0] != 0) {
         load_weights(net, weights);
@@ -1326,7 +1326,7 @@ network *load_network_custom(char *cfg, char *weights, int clear, int batch)
 network *load_network(char *cfg, char *weights, int clear)
 {
     printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
-    network *net = calloc(1, sizeof(network));
+    network* net = (network*)calloc(1, sizeof(network));
     *net = parse_network_cfg(cfg);
     if (weights && weights[0] != 0) {
         load_weights(net, weights);
diff --git a/src/parser.h b/src/parser.h
index d8a72bea..5e0d3fd1 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -2,6 +2,9 @@
 #define PARSER_H
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 network parse_network_cfg(char *filename);
 network parse_network_cfg_custom(char *filename, int batch, int time_steps);
 void save_network(network net, char *filename);
@@ -11,4 +14,7 @@ void save_weights_double(network net, char *filename);
 void load_weights(network *net, char *filename);
 void load_weights_upto(network *net, char *filename, int cutoff);
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/region_layer.c b/src/region_layer.c
index 42730e09..f7f82088 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -9,11 +9,10 @@
 #include <string.h>
 #include <stdlib.h>
 
-#define DOABS 1
 
 region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes)
 {
-    region_layer l = {0};
+    region_layer l = { (LAYER_TYPE)0 };
     l.type = REGION;
 
     l.n = n;
@@ -22,15 +21,15 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
     l.w = w;
     l.classes = classes;
     l.coords = coords;
-    l.cost = calloc(1, sizeof(float));
-    l.biases = calloc(n*2, sizeof(float));
-    l.bias_updates = calloc(n*2, sizeof(float));
+    l.cost = (float*)calloc(1, sizeof(float));
+    l.biases = (float*)calloc(n * 2, sizeof(float));
+    l.bias_updates = (float*)calloc(n * 2, sizeof(float));
     l.outputs = h*w*n*(classes + coords + 1);
     l.inputs = l.outputs;
     l.max_boxes = max_boxes;
     l.truths = max_boxes*(5);
-    l.delta = calloc(batch*l.outputs, sizeof(float));
-    l.output = calloc(batch*l.outputs, sizeof(float));
+    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
     int i;
     for(i = 0; i < n*2; ++i){
         l.biases[i] = .5;
@@ -61,8 +60,8 @@ void resize_region_layer(layer *l, int w, int h)
     l->outputs = h*w*l->n*(l->classes + l->coords + 1);
     l->inputs = l->outputs;
 
-    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
-    l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
+    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
+    l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
 
 #ifdef GPU
     if (old_w < w || old_h < h) {
@@ -444,11 +443,11 @@ void forward_region_layer_gpu(const region_layer l, network_state state)
         softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
     }
 
-    float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
+    float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
     float *truth_cpu = 0;
     if(state.truth){
         int num_truth = l.batch*l.truths;
-        truth_cpu = calloc(num_truth, sizeof(float));
+        truth_cpu = (float*)calloc(num_truth, sizeof(float));
         cuda_pull_array(state.truth, truth_cpu, num_truth);
     }
     cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs);
diff --git a/src/region_layer.h b/src/region_layer.h
index daf68c97..2b40d80f 100644
--- a/src/region_layer.h
+++ b/src/region_layer.h
@@ -6,6 +6,9 @@
 
 typedef layer region_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords, int max_boxes);
 void forward_region_layer(const region_layer l, network_state state);
 void backward_region_layer(const region_layer l, network_state state);
@@ -20,4 +23,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state);
 void backward_region_layer_gpu(region_layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/reorg_layer.c b/src/reorg_layer.c
index c298b400..47c5efa4 100644
--- a/src/reorg_layer.c
+++ b/src/reorg_layer.c
@@ -6,7 +6,7 @@
 
 layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
 {
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.type = REORG;
     l.batch = batch;
     l.stride = stride;
@@ -27,8 +27,8 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
     l.outputs = l.out_h * l.out_w * l.out_c;
     l.inputs = h*w*c;
     int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.output =  calloc(output_size, sizeof(float));
-    l.delta =   calloc(output_size, sizeof(float));
+    l.output = (float*)calloc(output_size, sizeof(float));
+    l.delta = (float*)calloc(output_size, sizeof(float));
 
     l.forward = forward_reorg_layer;
     l.backward = backward_reorg_layer;
@@ -64,8 +64,8 @@ void resize_reorg_layer(layer *l, int w, int h)
     l->inputs = l->outputs;
     int output_size = l->outputs * l->batch;
 
-    l->output = realloc(l->output, output_size * sizeof(float));
-    l->delta = realloc(l->delta, output_size * sizeof(float));
+    l->output = (float*)realloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/reorg_layer.h b/src/reorg_layer.h
index 21c22cd8..b6ed379c 100644
--- a/src/reorg_layer.h
+++ b/src/reorg_layer.h
@@ -6,6 +6,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse);
 void resize_reorg_layer(layer *l, int w, int h);
 void forward_reorg_layer(const layer l, network_state state);
@@ -16,5 +19,8 @@ void forward_reorg_layer_gpu(layer l, network_state state);
 void backward_reorg_layer_gpu(layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/reorg_old_layer.c b/src/reorg_old_layer.c
index 30206d9c..defc034d 100644
--- a/src/reorg_old_layer.c
+++ b/src/reorg_old_layer.c
@@ -6,7 +6,7 @@
 
 layer make_reorg_old_layer(int batch, int w, int h, int c, int stride, int reverse)
 {
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.type = REORG_OLD;
     l.batch = batch;
     l.stride = stride;
@@ -27,8 +27,8 @@ layer make_reorg_old_layer(int batch, int w, int h, int c, int stride, int rever
     l.outputs = l.out_h * l.out_w * l.out_c;
     l.inputs = h*w*c;
     int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.output =  calloc(output_size, sizeof(float));
-    l.delta =   calloc(output_size, sizeof(float));
+    l.output = (float*)calloc(output_size, sizeof(float));
+    l.delta = (float*)calloc(output_size, sizeof(float));
 
     l.forward = forward_reorg_old_layer;
     l.backward = backward_reorg_old_layer;
@@ -64,8 +64,8 @@ void resize_reorg_old_layer(layer *l, int w, int h)
     l->inputs = l->outputs;
     int output_size = l->outputs * l->batch;
 
-    l->output = realloc(l->output, output_size * sizeof(float));
-    l->delta = realloc(l->delta, output_size * sizeof(float));
+    l->output = (float*)realloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/reorg_old_layer.h b/src/reorg_old_layer.h
index e1838202..b66769b5 100644
--- a/src/reorg_old_layer.h
+++ b/src/reorg_old_layer.h
@@ -6,6 +6,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_reorg_old_layer(int batch, int h, int w, int c, int stride, int reverse);
 void resize_reorg_old_layer(layer *l, int w, int h);
 void forward_reorg_old_layer(const layer l, network_state state);
@@ -16,5 +19,8 @@ void forward_reorg_old_layer_gpu(layer l, network_state state);
 void backward_reorg_old_layer_gpu(layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/rnn.c b/src/rnn.c
index 3fb634e4..93107585 100644
--- a/src/rnn.c
+++ b/src/rnn.c
@@ -5,7 +5,7 @@
 #include "parser.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 typedef struct {
@@ -18,20 +18,20 @@ int *read_tokenized_data(char *filename, size_t *read)
     size_t size = 512;
     size_t count = 0;
     FILE *fp = fopen(filename, "r");
-    int *d = calloc(size, sizeof(int));
+    int* d = (int*)calloc(size, sizeof(int));
     int n, one;
     one = fscanf(fp, "%d", &n);
     while(one == 1){
         ++count;
         if(count > size){
             size = size*2;
-            d = realloc(d, size*sizeof(int));
+            d = (int*)realloc(d, size * sizeof(int));
         }
         d[count-1] = n;
         one = fscanf(fp, "%d", &n);
     }
     fclose(fp);
-    d = realloc(d, count*sizeof(int));
+    d = (int*)realloc(d, count * sizeof(int));
     *read = count;
     return d;
 }
@@ -41,26 +41,26 @@ char **read_tokens(char *filename, size_t *read)
     size_t size = 512;
     size_t count = 0;
     FILE *fp = fopen(filename, "r");
-    char **d = calloc(size, sizeof(char *));
+    char** d = (char**)calloc(size, sizeof(char*));
     char *line;
     while((line=fgetl(fp)) != 0){
         ++count;
         if(count > size){
             size = size*2;
-            d = realloc(d, size*sizeof(char *));
+            d = (char**)realloc(d, size * sizeof(char*));
         }
         d[count-1] = line;
     }
     fclose(fp);
-    d = realloc(d, count*sizeof(char *));
+    d = (char**)realloc(d, count * sizeof(char*));
     *read = count;
     return d;
 }
 
 float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps)
 {
-    float *x = calloc(batch * steps * characters, sizeof(float));
-    float *y = calloc(batch * steps * characters, sizeof(float));
+    float* x = (float*)calloc(batch * steps * characters, sizeof(float));
+    float* y = (float*)calloc(batch * steps * characters, sizeof(float));
     int i,j;
     for(i = 0; i < batch; ++i){
         for(j = 0; j < steps; ++j){
@@ -85,8 +85,8 @@ float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size
 
 float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps)
 {
-    float *x = calloc(batch * steps * characters, sizeof(float));
-    float *y = calloc(batch * steps * characters, sizeof(float));
+    float* x = (float*)calloc(batch * steps * characters, sizeof(float));
+    float* y = (float*)calloc(batch * steps * characters, sizeof(float));
     int i,j;
     for(i = 0; i < batch; ++i){
         for(j = 0; j < steps; ++j){
@@ -141,12 +141,12 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
         size = ftell(fp);
         fseek(fp, 0, SEEK_SET);
 
-        text = calloc(size+1, sizeof(char));
+        text = (unsigned char *)calloc(size + 1, sizeof(char));
         fread(text, 1, size, fp);
         fclose(fp);
     }
 
-    char *backup_directory = "backup";
+    char* backup_directory = "backup/";
     char *base = basecfg(cfgfile);
     fprintf(stderr, "%s\n", base);
     float avg_loss = -1;
@@ -165,7 +165,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
     int streams = batch/steps;
     printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %d \n", batch, steps, streams, net.subdivisions, size);
     printf(" global_batch = %d \n", batch*net.subdivisions);
-    size_t *offsets = calloc(streams, sizeof(size_t));
+    size_t* offsets = (size_t*)calloc(streams, sizeof(size_t));
     int j;
     for(j = 0; j < streams; ++j){
         offsets[j] = rand_size_t()%size;
@@ -248,7 +248,7 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
     for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
     int c = 0;
     int len = strlen(seed);
-    float *input = calloc(inputs, sizeof(float));
+    float* input = (float*)calloc(inputs, sizeof(float));
 
     /*
        fill_cpu(inputs, 0, input, 1);
@@ -307,7 +307,7 @@ void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int r
     int i, j;
     for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
     int c = 0;
-    float *input = calloc(inputs, sizeof(float));
+    float* input = (float*)calloc(inputs, sizeof(float));
     float *out = 0;
 
     while((c = getc(stdin)) != EOF){
@@ -346,7 +346,7 @@ void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed)
     int words = 1;
     int c;
     int len = strlen(seed);
-    float *input = calloc(inputs, sizeof(float));
+    float* input = (float*)calloc(inputs, sizeof(float));
     int i;
     for(i = 0; i < len; ++i){
         c = seed[i];
@@ -398,7 +398,7 @@ void valid_char_rnn(char *cfgfile, char *weightfile, char *seed)
     int words = 1;
     int c;
     int len = strlen(seed);
-    float *input = calloc(inputs, sizeof(float));
+    float* input = (float*)calloc(inputs, sizeof(float));
     int i;
     for(i = 0; i < len; ++i){
         c = seed[i];
@@ -437,7 +437,7 @@ void vec_char_rnn(char *cfgfile, char *weightfile, char *seed)
 
     int c;
     int seed_len = strlen(seed);
-    float *input = calloc(inputs, sizeof(float));
+    float* input = (float*)calloc(inputs, sizeof(float));
     int i;
     char *line;
     while((line=fgetl(stdin)) != 0){
diff --git a/src/rnn_layer.c b/src/rnn_layer.c
index c5669adb..22aade09 100644
--- a/src/rnn_layer.c
+++ b/src/rnn_layer.c
@@ -30,7 +30,7 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
 {
     fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs);
     batch = batch / steps;
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.batch = batch;
     l.type = RNN;
     l.steps = steps;
@@ -40,21 +40,21 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
     l.out_h = 1;
     l.out_c = outputs;
 
-    l.state = calloc(batch*hidden*(steps+1), sizeof(float));
+    l.state = (float*)calloc(batch * hidden * (steps + 1), sizeof(float));
 
-    l.input_layer = malloc(sizeof(layer));
+    l.input_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.input_layer) = make_connected_layer(batch, steps, inputs, hidden, activation, batch_normalize);
     l.input_layer->batch = batch;
     if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
 
-    l.self_layer = malloc(sizeof(layer));
+    l.self_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.self_layer) = make_connected_layer(batch, steps, hidden, hidden, (log==2)?LOGGY:(log==1?LOGISTIC:activation), batch_normalize);
     l.self_layer->batch = batch;
     if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
 
-    l.output_layer = malloc(sizeof(layer));
+    l.output_layer = (layer*)malloc(sizeof(layer));
     fprintf(stderr, "\t\t");
     *(l.output_layer) = make_connected_layer(batch, steps, hidden, outputs, activation, batch_normalize);
     l.output_layer->batch = batch;
diff --git a/src/rnn_layer.h b/src/rnn_layer.h
index bb9478b9..43b57d82 100644
--- a/src/rnn_layer.h
+++ b/src/rnn_layer.h
@@ -7,6 +7,9 @@
 #include "network.h"
 #define USET
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);
 
 void forward_rnn_layer(layer l, network_state state);
@@ -21,5 +24,8 @@ void push_rnn_layer(layer l);
 void pull_rnn_layer(layer l);
 #endif
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/rnn_vid.c b/src/rnn_vid.c
index b9ea8a35..c0fd3d36 100644
--- a/src/rnn_vid.c
+++ b/src/rnn_vid.c
@@ -5,10 +5,10 @@
 #include "blas.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/core/version.hpp"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
 image get_image_from_stream(CvCapture *cap);
 image ipl_to_image(IplImage* src);
@@ -28,10 +28,10 @@ float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int ste
     image out_im = get_network_image(net);
     int output_size = out_im.w*out_im.h*out_im.c;
     printf("%d %d %d\n", out_im.w, out_im.h, out_im.c);
-    float *feats = calloc(net.batch*batch*output_size, sizeof(float));
+    float* feats = (float*)calloc(net.batch * batch * output_size, sizeof(float));
     for(b = 0; b < batch; ++b){
         int input_size = net.w*net.h*net.c;
-        float *input = calloc(input_size*net.batch, sizeof(float));
+        float* input = (float*)calloc(input_size * net.batch, sizeof(float));
         char *filename = files[rand()%n];
         CvCapture *cap = cvCaptureFromFile(filename);
         int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT);
@@ -80,7 +80,7 @@ float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int ste
 void train_vid_rnn(char *cfgfile, char *weightfile)
 {
     char *train_videos = "data/vid/train.txt";
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     srand(time(0));
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
@@ -101,7 +101,7 @@ void train_vid_rnn(char *cfgfile, char *weightfile)
     int batch = net.batch / net.time_steps;
 
     network extractor = parse_network_cfg("cfg/extractor.cfg");
-    load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv");
+    load_weights(&extractor, "trained/yolo-coco.conv");
 
     while(get_current_batch(net) < net.max_batches){
         i += 1;
@@ -154,7 +154,7 @@ image save_reconstruction(network net, image *init, float *feat, char *name, int
 void generate_vid_rnn(char *cfgfile, char *weightfile)
 {
     network extractor = parse_network_cfg("cfg/extractor.recon.cfg");
-    load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv");
+    load_weights(&extractor, "trained/yolo-coco.conv");
 
     network net = parse_network_cfg(cfgfile);
     if(weightfile){
@@ -164,7 +164,7 @@ void generate_vid_rnn(char *cfgfile, char *weightfile)
     set_batch_network(&net, 1);
 
     int i;
-    CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4");
+    CvCapture* cap = cvCaptureFromFile("extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4");
     float *feat;
     float *next;
 	next = NULL;
@@ -191,9 +191,9 @@ void generate_vid_rnn(char *cfgfile, char *weightfile)
     }
     for(i = 0; i < 30; ++i){
         next = network_predict(net, next);
-        image new = save_reconstruction(extractor, &last, next, "new", i);
+        image newimage = save_reconstruction(extractor, &last, next, "newimage", i);
         free_image(last);
-        last = new;
+        last = newimage;
     }
 }
 
diff --git a/src/route_layer.c b/src/route_layer.c
index db1ac206..8bd6817a 100644
--- a/src/route_layer.c
+++ b/src/route_layer.c
@@ -6,7 +6,7 @@
 route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes)
 {
     fprintf(stderr,"route ");
-    route_layer l = {0};
+    route_layer l = { (LAYER_TYPE)0 };
     l.type = ROUTE;
     l.batch = batch;
     l.n = n;
@@ -21,8 +21,8 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz
     fprintf(stderr, "\n");
     l.outputs = outputs;
     l.inputs = outputs;
-    l.delta =  calloc(outputs*batch, sizeof(float));
-    l.output = calloc(outputs*batch, sizeof(float));;
+    l.delta = (float*)calloc(outputs * batch, sizeof(float));
+    l.output = (float*)calloc(outputs * batch, sizeof(float));
 
     l.forward = forward_route_layer;
     l.backward = backward_route_layer;
@@ -58,8 +58,8 @@ void resize_route_layer(route_layer *l, network *net)
         }
     }
     l->inputs = l->outputs;
-    l->delta =  realloc(l->delta, l->outputs*l->batch*sizeof(float));
-    l->output = realloc(l->output, l->outputs*l->batch*sizeof(float));
+    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/route_layer.h b/src/route_layer.h
index 45467d95..3ee90193 100644
--- a/src/route_layer.h
+++ b/src/route_layer.h
@@ -5,6 +5,9 @@
 
 typedef layer route_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size);
 void forward_route_layer(const route_layer l, network_state state);
 void backward_route_layer(const route_layer l, network_state state);
@@ -15,4 +18,7 @@ void forward_route_layer_gpu(const route_layer l, network_state state);
 void backward_route_layer_gpu(const route_layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c
index 263ee1f4..b84b72f2 100644
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@@ -7,7 +7,7 @@
 layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2)
 {
     fprintf(stderr,"Shortcut Layer: %d\n", index);
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.type = SHORTCUT;
     l.batch = batch;
     l.w = w2;
@@ -21,8 +21,8 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
 
     l.index = index;
 
-    l.delta =  calloc(l.outputs*batch, sizeof(float));
-    l.output = calloc(l.outputs*batch, sizeof(float));;
+    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
 
     l.forward = forward_shortcut_layer;
     l.backward = backward_shortcut_layer;
@@ -44,8 +44,8 @@ void resize_shortcut_layer(layer *l, int w, int h)
     l->h = l->out_h = h;
     l->outputs = w*h*l->out_c;
     l->inputs = l->outputs;
-    l->delta = realloc(l->delta, l->outputs*l->batch * sizeof(float));
-    l->output = realloc(l->output, l->outputs*l->batch * sizeof(float));
+    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/shortcut_layer.h b/src/shortcut_layer.h
index 912d72e0..b24aa3e6 100644
--- a/src/shortcut_layer.h
+++ b/src/shortcut_layer.h
@@ -4,6 +4,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2);
 void forward_shortcut_layer(const layer l, network_state state);
 void backward_shortcut_layer(const layer l, network_state state);
@@ -14,4 +17,7 @@ void forward_shortcut_layer_gpu(const layer l, network_state state);
 void backward_shortcut_layer_gpu(const layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index bfe34bc1..3efe0157 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -30,16 +30,16 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
 {
     assert(inputs%groups == 0);
     fprintf(stderr, "softmax                                        %4d\n",  inputs);
-    softmax_layer l = {0};
+    softmax_layer l = { (LAYER_TYPE)0 };
     l.type = SOFTMAX;
     l.batch = batch;
     l.groups = groups;
     l.inputs = inputs;
     l.outputs = inputs;
-    l.loss = calloc(inputs*batch, sizeof(float));
-    l.output = calloc(inputs*batch, sizeof(float));
-    l.delta = calloc(inputs*batch, sizeof(float));
-    l.cost = calloc(1, sizeof(float));
+    l.loss = (float*)calloc(inputs * batch, sizeof(float));
+    l.output = (float*)calloc(inputs * batch, sizeof(float));
+    l.delta = (float*)calloc(inputs * batch, sizeof(float));
+    l.cost = (float*)calloc(1, sizeof(float));
 
     l.forward = forward_softmax_layer;
     l.backward = backward_softmax_layer;
diff --git a/src/softmax_layer.h b/src/softmax_layer.h
index 821a8dd7..55e622c1 100644
--- a/src/softmax_layer.h
+++ b/src/softmax_layer.h
@@ -5,6 +5,9 @@
 
 typedef layer softmax_layer;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 void softmax_array(float *input, int n, float temp, float *output);
 softmax_layer make_softmax_layer(int batch, int inputs, int groups);
 void forward_softmax_layer(const softmax_layer l, network_state state);
@@ -16,4 +19,7 @@ void forward_softmax_layer_gpu(const softmax_layer l, network_state state);
 void backward_softmax_layer_gpu(const softmax_layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/super.c b/src/super.c
index 63e9860a..0f96fddb 100644
--- a/src/super.c
+++ b/src/super.c
@@ -4,13 +4,13 @@
 #include "parser.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 void train_super(char *cfgfile, char *weightfile)
 {
-    char *train_images = "/data/imagenet/imagenet1k.train.list";
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* train_images = "data/imagenet/imagenet1k.train.list";
+    char* backup_directory = "backup/";
     srand(time(0));
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
diff --git a/src/swag.c b/src/swag.c
index 2cb3093b..d5020007 100644
--- a/src/swag.c
+++ b/src/swag.c
@@ -6,13 +6,13 @@
 #include "box.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 void train_swag(char *cfgfile, char *weightfile)
 {
     char *train_images = "data/voc.0712.trainval";
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     srand(time(0));
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
diff --git a/src/tag.c b/src/tag.c
index 1e43e7d9..f6cbc0fb 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -3,7 +3,7 @@
 #include "parser.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 void train_tag(char *cfgfile, char *weightfile, int clear)
@@ -11,7 +11,7 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
     srand(time(0));
     float avg_loss = -1;
     char *base = basecfg(cfgfile);
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     printf("%s\n", base);
     network net = parse_network_cfg(cfgfile);
     if(weightfile){
@@ -20,7 +20,7 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
     if(clear) *net.seen = 0;
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
     int imgs = 1024;
-    list *plist = get_paths("/home/pjreddie/tag/train.list");
+    list* plist = get_paths("tag/train.list");
     char **paths = (char **)list_to_array(plist);
     printf("%d\n", plist->size);
     int N = plist->size;
diff --git a/src/tree.c b/src/tree.c
index d66da9f8..0b320e3b 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -93,42 +93,42 @@ tree *read_tree(char *filename)
     int groups = 0;
     int n = 0;
     while((line=fgetl(fp)) != 0){
-        char *id = calloc(256, sizeof(char));
+        char* id = (char*)calloc(256, sizeof(char));
         int parent = -1;
         sscanf(line, "%s %d", id, &parent);
-        t.parent = realloc(t.parent, (n+1)*sizeof(int));
+        t.parent = (int*)realloc(t.parent, (n + 1) * sizeof(int));
         t.parent[n] = parent;
 
-        t.name = realloc(t.name, (n+1)*sizeof(char *));
+        t.name = (char**)realloc(t.name, (n + 1) * sizeof(char*));
         t.name[n] = id;
         if(parent != last_parent){
             ++groups;
-            t.group_offset = realloc(t.group_offset, groups * sizeof(int));
+            t.group_offset = (int*)realloc(t.group_offset, groups * sizeof(int));
             t.group_offset[groups - 1] = n - group_size;
-            t.group_size = realloc(t.group_size, groups * sizeof(int));
+            t.group_size = (int*)realloc(t.group_size, groups * sizeof(int));
             t.group_size[groups - 1] = group_size;
             group_size = 0;
             last_parent = parent;
         }
-        t.group = realloc(t.group, (n+1)*sizeof(int));
+        t.group = (int*)realloc(t.group, (n + 1) * sizeof(int));
         t.group[n] = groups;
         ++n;
         ++group_size;
     }
     ++groups;
-    t.group_offset = realloc(t.group_offset, groups * sizeof(int));
+    t.group_offset = (int*)realloc(t.group_offset, groups * sizeof(int));
     t.group_offset[groups - 1] = n - group_size;
-    t.group_size = realloc(t.group_size, groups * sizeof(int));
+    t.group_size = (int*)realloc(t.group_size, groups * sizeof(int));
     t.group_size[groups - 1] = group_size;
     t.n = n;
     t.groups = groups;
-    t.leaf = calloc(n, sizeof(int));
+    t.leaf = (int*)calloc(n, sizeof(int));
     int i;
     for(i = 0; i < n; ++i) t.leaf[i] = 1;
     for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0;
 
     fclose(fp);
-    tree *tree_ptr = calloc(1, sizeof(tree));
+    tree* tree_ptr = (tree*)calloc(1, sizeof(tree));
     *tree_ptr = t;
     //error(0);
     return tree_ptr;
diff --git a/src/tree.h b/src/tree.h
index b44d3f4a..86379738 100644
--- a/src/tree.h
+++ b/src/tree.h
@@ -15,10 +15,16 @@
 //    int *group_offset;
 //} tree;
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 //tree *read_tree(char *filename);
 int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride);
 void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves);
 void change_leaves(tree *t, char *leaf_list);
 float get_hierarchy_probability(float *x, tree *hier, int c);
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/upsample_layer.c b/src/upsample_layer.c
index 1aa5a150..dafc1b8d 100644
--- a/src/upsample_layer.c
+++ b/src/upsample_layer.c
@@ -6,7 +6,7 @@
 
 layer make_upsample_layer(int batch, int w, int h, int c, int stride)
 {
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.type = UPSAMPLE;
     l.batch = batch;
     l.w = w;
@@ -24,8 +24,8 @@ layer make_upsample_layer(int batch, int w, int h, int c, int stride)
     l.stride = stride;
     l.outputs = l.out_w*l.out_h*l.out_c;
     l.inputs = l.w*l.h*l.c;
-    l.delta =  calloc(l.outputs*batch, sizeof(float));
-    l.output = calloc(l.outputs*batch, sizeof(float));;
+    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
 
     l.forward = forward_upsample_layer;
     l.backward = backward_upsample_layer;
@@ -53,8 +53,8 @@ void resize_upsample_layer(layer *l, int w, int h)
     }
     l->outputs = l->out_w*l->out_h*l->out_c;
     l->inputs = l->h*l->w*l->c;
-    l->delta =  realloc(l->delta, l->outputs*l->batch*sizeof(float));
-    l->output = realloc(l->output, l->outputs*l->batch*sizeof(float));
+    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
 
 #ifdef GPU
     cuda_free(l->output_gpu);
diff --git a/src/upsample_layer.h b/src/upsample_layer.h
index aea9434d..68aff329 100644
--- a/src/upsample_layer.h
+++ b/src/upsample_layer.h
@@ -4,6 +4,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_upsample_layer(int batch, int w, int h, int c, int stride);
 void forward_upsample_layer(const layer l, network_state state);
 void backward_upsample_layer(const layer l, network_state state);
@@ -14,4 +17,7 @@ void forward_upsample_layer_gpu(const layer l, network_state state);
 void backward_upsample_layer_gpu(const layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/utils.c b/src/utils.c
index d20d4fb3..6614f1a8 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -1,3 +1,4 @@
+#include "utils.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -5,16 +6,13 @@
 #include <assert.h>
 #include <float.h>
 #include <limits.h>
+#include "darkunistd.h"
 #ifdef WIN32
-#include "unistd.h"
 #include "gettimeofday.h"
 #else
-#include <unistd.h>
 #include <sys/time.h>
 #endif
-#include "utils.h"
 
-#pragma warning(disable: 4996)
 
 double what_time_is_it_now()
 {
@@ -34,7 +32,7 @@ int *read_map(char *filename)
     if(!file) file_error(filename);
     while((str=fgetl(file))){
         ++n;
-        map = realloc(map, n*sizeof(int));
+        map = (int*)realloc(map, n * sizeof(int));
         map[n-1] = atoi(str);
     }
     return map;
@@ -54,7 +52,7 @@ void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections)
 void shuffle(void *arr, size_t n, size_t size)
 {
     size_t i;
-    void *swp = calloc(1, size);
+    void* swp = (void*)calloc(1, size);
     for(i = 0; i < n-1; ++i){
         size_t j = i + rand()/(RAND_MAX / (n-i)+1);
         memcpy(swp,            (char*)arr+(j*size), size);
@@ -167,9 +165,9 @@ void pm(int M, int N, float *A)
     printf("\n");
 }
 
-void find_replace(char *str, char *orig, char *rep, char *output)
+void find_replace(const char* str, char* orig, char* rep, char* output)
 {
-    char *buffer = calloc(8192, sizeof(char));
+    char* buffer = (char*)calloc(8192, sizeof(char));
     char *p;
 
     sprintf(buffer, "%s", str);
@@ -187,7 +185,7 @@ void find_replace(char *str, char *orig, char *rep, char *output)
 
 void trim(char *str)
 {
-    char *buffer = calloc(8192, sizeof(char));
+    char* buffer = (char*)calloc(8192, sizeof(char));
     sprintf(buffer, "%s", str);
 
     char *p = buffer;
@@ -205,7 +203,7 @@ void trim(char *str)
 
 void find_replace_extension(char *str, char *orig, char *rep, char *output)
 {
-    char *buffer = calloc(8192, sizeof(char));
+    char* buffer = (char*)calloc(8192, sizeof(char));
 
     sprintf(buffer, "%s", str);
     char *p = strstr(buffer, orig);
@@ -222,7 +220,7 @@ void find_replace_extension(char *str, char *orig, char *rep, char *output)
     free(buffer);
 }
 
-void replace_image_to_label(char *input_path, char *output_path)
+void replace_image_to_label(const char* input_path, char* output_path)
 {
     find_replace(input_path, "/images/train2014/", "/labels/train2014/", output_path);    // COCO
     find_replace(output_path, "/images/val2014/", "/labels/val2014/", output_path);        // COCO
@@ -356,7 +354,7 @@ char *fgetl(FILE *fp)
 {
     if(feof(fp)) return 0;
     size_t size = 512;
-    char *line = malloc(size*sizeof(char));
+    char* line = (char*)malloc(size * sizeof(char));
     if(!fgets(line, size, fp)){
         free(line);
         return 0;
@@ -367,7 +365,7 @@ char *fgetl(FILE *fp)
     while((line[curr-1] != '\n') && !feof(fp)){
         if(curr == size-1){
             size *= 2;
-            line = realloc(line, size*sizeof(char));
+            line = (char*)realloc(line, size * sizeof(char));
             if(!line) {
                 printf("%ld\n", size);
                 malloc_error();
@@ -446,7 +444,7 @@ void write_all(int fd, char *buffer, size_t bytes)
 
 char *copy_string(char *s)
 {
-    char *copy = malloc(strlen(s)+1);
+    char* copy = (char*)malloc(strlen(s) + 1);
     strncpy(copy, s, strlen(s)+1);
     return copy;
 }
@@ -482,7 +480,7 @@ int count_fields(char *line)
 
 float *parse_fields(char *line, int n)
 {
-    float *field = calloc(n, sizeof(float));
+    float* field = (float*)calloc(n, sizeof(float));
     char *c, *p, *end;
     int count = 0;
     int done = 0;
@@ -656,8 +654,8 @@ int max_index(float *a, int n)
 
 int top_max_index(float *a, int n, int k)
 {
-    float *values = calloc(k, sizeof(float));
-    int *indexes = calloc(k, sizeof(int));
+    float *values = (float*)calloc(k, sizeof(float));
+    int *indexes = (int*)calloc(k, sizeof(int));
     if (n <= 0) return -1;
     int i, j;
     for (i = 0; i < n; ++i) {
@@ -716,7 +714,7 @@ float rand_normal()
     rand1 = rand() / ((double) RAND_MAX);
     if(rand1 < 1e-100) rand1 = 1e-100;
     rand1 = -2 * log(rand1);
-    rand2 = (rand() / ((double) RAND_MAX)) * TWO_PI;
+    rand2 = (rand() / ((double)RAND_MAX)) * 2.0 * M_PI;
 
     return sqrt(rand1) * cos(rand2);
 }
@@ -765,9 +763,9 @@ float rand_scale(float s)
 float **one_hot_encode(float *a, int n, int k)
 {
     int i;
-    float **t = calloc(n, sizeof(float*));
+    float** t = (float**)calloc(n, sizeof(float*));
     for(i = 0; i < n; ++i){
-        t[i] = calloc(k, sizeof(float));
+        t[i] = (float*)calloc(k, sizeof(float));
         int index = (int)a[i];
         t[i][index] = 1;
     }
@@ -802,4 +800,4 @@ float rand_uniform_strong(float min, float max)
         max = swap;
     }
     return (random_float() * (max - min)) + min;
-}
\ No newline at end of file
+}
diff --git a/src/utils.h b/src/utils.h
index 8ef31263..f00ed2d5 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -4,25 +4,9 @@
 #include <time.h>
 #include "list.h"
 
-#if defined(_MSC_VER) && _MSC_VER < 1900
-	#define snprintf(buf,len, format,...) _snprintf_s(buf, len,len, format, __VA_ARGS__)
-#endif
-
-#define SECRET_NUM -1234
-#define TWO_PI 6.2831853071795864769252866
-
-#ifdef LIB_EXPORTS
-#if defined(_MSC_VER)
-#define LIB_API __declspec(dllexport)
-#else
-#define LIB_API __attribute__((visibility("default")))
-#endif
-#else
-#if defined(_MSC_VER)
-#define LIB_API
-#else
-#define LIB_API
-#endif
+#include "darknet.h"
+#ifdef __cplusplus
+extern "C" {
 #endif
 
 LIB_API void free_ptrs(void **ptrs, int n);
@@ -41,8 +25,8 @@ void read_all(int fd, char *buffer, size_t bytes);
 void write_all(int fd, char *buffer, size_t bytes);
 int read_all_fail(int fd, char *buffer, size_t bytes);
 int write_all_fail(int fd, char *buffer, size_t bytes);
-void find_replace(char *str, char *orig, char *rep, char *output);
-void replace_image_to_label(char *input_path, char *output_path);
+LIB_API void find_replace(const char* str, char* orig, char* rep, char* output);
+void replace_image_to_label(const char* input_path, char* output_path);
 void error(const char *s);
 void malloc_error();
 void file_error(char *s);
@@ -88,5 +72,8 @@ float random_float();
 float rand_uniform_strong(float min, float max);
 int int_index(int *a, int val, int n);
 
+#ifdef __cplusplus
+}
 #endif
 
+#endif
diff --git a/src/voxel.c b/src/voxel.c
index 476663ff..57fb17b6 100644
--- a/src/voxel.c
+++ b/src/voxel.c
@@ -4,10 +4,10 @@
 #include "parser.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/core/version.hpp"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
 image get_image_from_stream(CvCapture *cap);
 #endif
@@ -50,8 +50,8 @@ void extract_voxel(char *lfile, char *rfile, char *prefix)
 
 void train_voxel(char *cfgfile, char *weightfile)
 {
-    char *train_images = "/data/imagenet/imagenet1k.train.list";
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* train_images = "data/imagenet/imagenet1k.train.list";
+    char* backup_directory = "backup/";
     srand(time(0));
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
diff --git a/src/writing.c b/src/writing.c
index 0a76d48e..b6c49075 100644
--- a/src/writing.c
+++ b/src/writing.c
@@ -3,12 +3,12 @@
 #include "parser.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
+#include <opencv2/highgui/highgui_c.h>
 #endif
 
 void train_writing(char *cfgfile, char *weightfile)
 {
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* backup_directory = "backup/";
     srand(time(0));
     float avg_loss = -1;
     char *base = basecfg(cfgfile);
diff --git a/src/yolo.c b/src/yolo.c
index 56eb814e..fc7d8a6e 100644
--- a/src/yolo.c
+++ b/src/yolo.c
@@ -7,11 +7,11 @@
 #include "demo.h"
 
 #ifdef OPENCV
-#include "opencv2/highgui/highgui_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
-#include "opencv2/core/version.hpp"
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio_c.h"
+#include <opencv2/videoio/videoio_c.h>
 #endif
 #endif
 
@@ -19,8 +19,8 @@ char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "c
 
 void train_yolo(char *cfgfile, char *weightfile)
 {
-    char *train_images = "/data/voc/train.txt";
-    char *backup_directory = "/home/pjreddie/backup/";
+    char* train_images = "data/voc/train.txt";
+    char* backup_directory = "backup/";
     srand(time(0));
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
@@ -125,7 +125,7 @@ void validate_yolo(char *cfgfile, char *weightfile)
 
     char *base = "results/comp4_det_test_";
     //list *plist = get_paths("data/voc.2007.test");
-    list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt");
+    list* plist = get_paths("data/voc/2007_test.txt");
     //list *plist = get_paths("data/voc.2012.test");
     char **paths = (char **)list_to_array(plist);
 
@@ -133,15 +133,15 @@ void validate_yolo(char *cfgfile, char *weightfile)
     int classes = l.classes;
 
     int j;
-    FILE **fps = calloc(classes, sizeof(FILE *));
+    FILE** fps = (FILE**)calloc(classes, sizeof(FILE*));
     for(j = 0; j < classes; ++j){
         char buff[1024];
         snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
         fps[j] = fopen(buff, "w");
     }
-    box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
-    float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
-    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
+    box* boxes = (box*)calloc(l.side * l.side * l.n, sizeof(box));
+    float** probs = (float**)calloc(l.side * l.side * l.n, sizeof(float*));
+    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
 
     int m = plist->size;
     int i=0;
@@ -152,11 +152,11 @@ void validate_yolo(char *cfgfile, char *weightfile)
     float iou_thresh = .5;
 
     int nthreads = 8;
-    image *val = calloc(nthreads, sizeof(image));
-    image *val_resized = calloc(nthreads, sizeof(image));
-    image *buf = calloc(nthreads, sizeof(image));
-    image *buf_resized = calloc(nthreads, sizeof(image));
-    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)calloc(nthreads, sizeof(image));
+    image* val_resized = (image*)calloc(nthreads, sizeof(image));
+    image* buf = (image*)calloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
 
     load_args args = {0};
     args.w = net.w;
@@ -220,15 +220,15 @@ void validate_yolo_recall(char *cfgfile, char *weightfile)
     int side = l.side;
 
     int j, k;
-    FILE **fps = calloc(classes, sizeof(FILE *));
+    FILE** fps = (FILE**)calloc(classes, sizeof(FILE*));
     for(j = 0; j < classes; ++j){
         char buff[1024];
         snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
         fps[j] = fopen(buff, "w");
     }
-    box *boxes = calloc(side*side*l.n, sizeof(box));
-    float **probs = calloc(side*side*l.n, sizeof(float *));
-    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
+    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
+    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
+    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
 
     int m = plist->size;
     int i=0;
@@ -299,9 +299,9 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
     char *input = buff;
     int j;
     float nms=.4;
-    box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
-    float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
-    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
+    box* boxes = (box*)calloc(l.side * l.side * l.n, sizeof(box));
+    float** probs = (float**)calloc(l.side * l.side * l.n, sizeof(float*));
+    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
     while(1){
         if(filename){
             strncpy(input, filename, 256);
diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp
index a724b3e5..c4554fba 100644
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@@ -1,5 +1,5 @@
 #include <iostream>
-#include <iomanip> 
+#include <iomanip>
 #include <string>
 #include <vector>
 #include <queue>
@@ -9,10 +9,6 @@
 #include <mutex>              // std::mutex, std::unique_lock
 #include <condition_variable> // std::condition_variable
 
-#ifdef _WIN32
-#define OPENCV
-#define GPU
-#endif
 
 // To use tracking - uncomment the following line. Tracking is supported only by OpenCV 3.x
 //#define TRACK_OPTFLOW
@@ -25,10 +21,11 @@
 
 #ifdef OPENCV
 #include <opencv2/opencv.hpp>            // C++
-#include "opencv2/core/version.hpp"
+#include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
-#include "opencv2/videoio/videoio.hpp"
+#include <opencv2/videoio/videoio.hpp>
 #define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION)
+#ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib")
 #ifdef TRACK_OPTFLOW
 #pragma comment(lib, "opencv_cudaoptflow" OPENCV_VERSION ".lib")
@@ -37,11 +34,14 @@
 #pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib")
 #pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib")
 #endif    // TRACK_OPTFLOW
+#endif    // USE_CMAKE_LIBS
 #else
 #define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)""CVAUX_STR(CV_VERSION_MAJOR)""CVAUX_STR(CV_VERSION_MINOR)
+#ifndef USE_CMAKE_LIBS
 #pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib")
 #pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib")
 #pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib")
+#endif    // USE_CMAKE_LIBS
 #endif    // CV_VERSION_EPOCH
 
 class track_kalman {
@@ -146,10 +146,10 @@ public:
                     //else std::cout << "shakin \n";
 
                     if (dx_vec[index] > 1000 || dy_vec[index] > 1000) {
-                        //std::cout << "!!! bad dx or dy, dx = " << dx_vec[index] << ", dy = " << dy_vec[index] << 
+                        //std::cout << "!!! bad dx or dy, dx = " << dx_vec[index] << ", dy = " << dy_vec[index] <<
                         //    ", delta_time = " << delta_time << ", update = " << update << std::endl;
                         dx_vec[index] = 0;
-                        dy_vec[index] = 0;                        
+                        dy_vec[index] = 0;
                     }
                     old_result_vec[k].x = new_result_vec[i].x;
                     old_result_vec[k].y = new_result_vec[i].y;
@@ -178,7 +178,7 @@ public:
 };
 
 
-void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std::string> obj_names, 
+void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std::string> obj_names,
     int current_det_fps = -1, int current_cap_fps = -1)
 {
     int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } };
@@ -191,8 +191,8 @@ void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std
             if (i.track_id > 0) obj_name += " - " + std::to_string(i.track_id);
             cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0);
             int const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
-            cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 30, 0)), 
-                cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)), 
+            cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 30, 0)),
+                cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)),
                 color, CV_FILLED, 8, 0);
             putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2);
         }
@@ -208,7 +208,7 @@ void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std
 void show_console_result(std::vector<bbox_t> const result_vec, std::vector<std::string> const obj_names) {
     for (auto &i : result_vec) {
         if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - ";
-        std::cout << "obj_id = " << i.obj_id << ",  x = " << i.x << ", y = " << i.y 
+        std::cout << "obj_id = " << i.obj_id << ",  x = " << i.x << ", y = " << i.y
             << ", w = " << i.w << ", h = " << i.h
             << std::setprecision(3) << ", prob = " << i.prob << std::endl;
     }
@@ -231,7 +231,7 @@ int main(int argc, char *argv[])
     std::string  weights_file = "yolov3.weights";
     std::string filename;
 
-    if (argc > 4) {    //voc.names yolo-voc.cfg yolo-voc.weights test.mp4        
+    if (argc > 4) {    //voc.names yolo-voc.cfg yolo-voc.weights test.mp4
         names_file = argv[1];
         cfg_file = argv[2];
         weights_file = argv[3];
@@ -251,12 +251,12 @@ int main(int argc, char *argv[])
     detector.wait_stream = true;
 #endif
 
-    while (true) 
-    {        
+    while (true)
+    {
         std::cout << "input image or video filename: ";
         if(filename.size() == 0) std::cin >> filename;
         if (filename.size() == 0) break;
-        
+
         try {
 #ifdef OPENCV
             extrapolate_coords_t extrapolate_coords;
@@ -295,7 +295,7 @@ int main(int argc, char *argv[])
                 if (save_output_videofile)
                     output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true);
 
-                while (!cur_frame.empty()) 
+                while (!cur_frame.empty())
                 {
                     // always sync
                     if (t_cap.joinable()) {
@@ -337,7 +337,7 @@ int main(int argc, char *argv[])
                             extrapolate_coords.update_result(result_vec, cur_time_extrapolate - 1);
                         }
 #else
-                        result_vec = detector.tracking_id(result_vec);    // comment it - if track_id is not required                    
+                        result_vec = detector.tracking_id(result_vec);    // comment it - if track_id is not required
                         extrapolate_coords.new_result(result_vec, cur_time_extrapolate - 1);
 #endif
                         // add old tracked objects
@@ -366,7 +366,7 @@ int main(int argc, char *argv[])
                             auto current_image = det_image;
                             consumed = true;
                             while (current_image.use_count() > 0 && !exit_flag) {
-                                auto result = detector.detect_resized(*current_image, frame_size.width, frame_size.height, 
+                                auto result = detector.detect_resized(*current_image, frame_size.width, frame_size.height,
                                     thresh, false);    // true
                                 ++fps_det_counter;
                                 std::unique_lock<std::mutex> lock(mtx);
@@ -399,7 +399,7 @@ int main(int argc, char *argv[])
                         result_vec = tracker_flow.tracking_flow(cur_frame);    // track optical flow
                         extrapolate_coords.update_result(result_vec, cur_time_extrapolate);
                         small_preview.draw(cur_frame, show_small_boxes);
-#endif                        
+#endif
                         auto result_vec_draw = result_vec;
                         if (extrapolate_flag) {
                             result_vec_draw = extrapolate_coords.predict(cur_time_extrapolate);
@@ -420,7 +420,7 @@ int main(int argc, char *argv[])
                             if (t_videowrite.joinable()) t_videowrite.join();
                             write_frame = cur_frame.clone();
                             videowrite_ready = false;
-                            t_videowrite = std::thread([&]() { 
+                            t_videowrite = std::thread([&]() {
                                  output_video << write_frame; videowrite_ready = true;
                             });
                         }
@@ -444,7 +444,7 @@ int main(int argc, char *argv[])
             else if (file_ext == "txt") {    // list of image files
                 std::ifstream file(filename);
                 if (!file.is_open()) std::cout << "File not found! \n";
-                else 
+                else
                     for (std::string line; file >> line;) {
                         std::cout << line << std::endl;
                         cv::Mat mat_img = cv::imread(line);
@@ -453,11 +453,11 @@ int main(int argc, char *argv[])
                         //draw_boxes(mat_img, result_vec, obj_names);
                         //cv::imwrite("res_" + line, mat_img);
                     }
-                
+
             }
             else {    // image file
                 cv::Mat mat_img = cv::imread(filename);
-                
+
                 auto start = std::chrono::steady_clock::now();
                 std::vector<bbox_t> result_vec = detector.detect(mat_img);
                 auto end = std::chrono::steady_clock::now();
@@ -477,7 +477,7 @@ int main(int argc, char *argv[])
             std::vector<bbox_t> result_vec = detector.detect(img);
             detector.free_image(img);
             show_console_result(result_vec, obj_names);
-#endif            
+#endif
         }
         catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); }
         catch (...) { std::cerr << "unknown exception \n"; getchar(); }
@@ -485,4 +485,4 @@ int main(int argc, char *argv[])
     }
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index 05739b4b..916391d1 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -13,7 +13,7 @@
 layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes)
 {
     int i;
-    layer l = {0};
+    layer l = { (LAYER_TYPE)0 };
     l.type = YOLO;
 
     l.n = n;
@@ -26,22 +26,22 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
     l.out_h = l.h;
     l.out_c = l.c;
     l.classes = classes;
-    l.cost = calloc(1, sizeof(float));
-    l.biases = calloc(total*2, sizeof(float));
+    l.cost = (float*)calloc(1, sizeof(float));
+    l.biases = (float*)calloc(total * 2, sizeof(float));
     if(mask) l.mask = mask;
     else{
-        l.mask = calloc(n, sizeof(int));
+        l.mask = (int*)calloc(n, sizeof(int));
         for(i = 0; i < n; ++i){
             l.mask[i] = i;
         }
     }
-    l.bias_updates = calloc(n*2, sizeof(float));
+    l.bias_updates = (float*)calloc(n * 2, sizeof(float));
     l.outputs = h*w*n*(classes + 4 + 1);
     l.inputs = l.outputs;
     l.max_boxes = max_boxes;
     l.truths = l.max_boxes*(4 + 1);    // 90*(4 + 1);
-    l.delta = calloc(batch*l.outputs, sizeof(float));
-    l.output = calloc(batch*l.outputs, sizeof(float));
+    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
     for(i = 0; i < total*2; ++i){
         l.biases[i] = .5;
     }
@@ -58,14 +58,14 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
     if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1;
     else {
         cudaGetLastError(); // reset CUDA-error
-        l.output = calloc(batch*l.outputs, sizeof(float));
+        l.output = (float*)calloc(batch * l.outputs, sizeof(float));
     }
 
     free(l.delta);
     if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1;
     else {
         cudaGetLastError(); // reset CUDA-error
-        l.delta = calloc(batch*l.outputs, sizeof(float));
+        l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
     }
 #endif
 
@@ -83,15 +83,15 @@ void resize_yolo_layer(layer *l, int w, int h)
     l->outputs = h*w*l->n*(l->classes + 4 + 1);
     l->inputs = l->outputs;
 
-    if (!l->output_pinned) l->output = realloc(l->output, l->batch*l->outputs * sizeof(float));
-    if (!l->delta_pinned) l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
+    if (!l->output_pinned) l->output = (float*)realloc(l->output, l->batch*l->outputs * sizeof(float));
+    if (!l->delta_pinned) l->delta = (float*)realloc(l->delta, l->batch*l->outputs*sizeof(float));
 
 #ifdef GPU
     if (l->output_pinned) {
         cudaFreeHost(l->output);
         if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) {
             cudaGetLastError(); // reset CUDA-error
-            l->output = realloc(l->output, l->batch*l->outputs * sizeof(float));
+            l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
             l->output_pinned = 0;
         }
     }
@@ -100,7 +100,7 @@ void resize_yolo_layer(layer *l, int w, int h)
         cudaFreeHost(l->delta);
         if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) {
             cudaGetLastError(); // reset CUDA-error
-            l->delta = realloc(l->delta, l->batch*l->outputs * sizeof(float));
+            l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
             l->delta_pinned = 0;
         }
     }
@@ -458,13 +458,13 @@ void forward_yolo_layer_gpu(const layer l, network_state state)
         return;
     }
 
-    float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
+    float *in_cpu = (float *)calloc(l.batch*l.inputs, sizeof(float));
     cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
     memcpy(in_cpu, l.output, l.batch*l.outputs*sizeof(float));
     float *truth_cpu = 0;
     if (state.truth) {
         int num_truth = l.batch*l.truths;
-        truth_cpu = calloc(num_truth, sizeof(float));
+        truth_cpu = (float *)calloc(num_truth, sizeof(float));
         cuda_pull_array(state.truth, truth_cpu, num_truth);
     }
     network_state cpu_state = state;
diff --git a/src/yolo_layer.h b/src/yolo_layer.h
index 0c2f5016..d67482fe 100644
--- a/src/yolo_layer.h
+++ b/src/yolo_layer.h
@@ -5,6 +5,9 @@
 #include "layer.h"
 #include "network.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes);
 void forward_yolo_layer(const layer l, network_state state);
 void backward_yolo_layer(const layer l, network_state state);
@@ -18,4 +21,7 @@ void forward_yolo_layer_gpu(const layer l, network_state state);
 void backward_yolo_layer_gpu(layer l, network_state state);
 #endif
 
+#ifdef __cplusplus
+}
+#endif
 #endif
diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp
index c5ea1dfb..668ef5cd 100644
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@@ -20,7 +20,6 @@ extern "C" {
 #include <iostream>
 #include <algorithm>
 
-#define FRAMES 3
 
 //static Detector* detector = NULL;
 static std::unique_ptr<Detector> detector;
@@ -93,9 +92,9 @@ void check_cuda(cudaError_t status) {
 
 struct detector_gpu_t {
     network net;
-    image images[FRAMES];
+    image images[NFRAMES];
     float *avg;
-    float *predictions[FRAMES];
+    float* predictions[NFRAMES];
     int demo_index;
     unsigned int *track_id;
 };
@@ -135,8 +134,8 @@ LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename
     int j;
 
     detector_gpu.avg = (float *)calloc(l.outputs, sizeof(float));
-    for (j = 0; j < FRAMES; ++j) detector_gpu.predictions[j] = (float *)calloc(l.outputs, sizeof(float));
-    for (j = 0; j < FRAMES; ++j) detector_gpu.images[j] = make_image(1, 1, 3);
+    for (j = 0; j < NFRAMES; ++j) detector_gpu.predictions[j] = (float*)calloc(l.outputs, sizeof(float));
+    for (j = 0; j < NFRAMES; ++j) detector_gpu.images[j] = make_image(1, 1, 3);
 
     detector_gpu.track_id = (unsigned int *)calloc(l.classes, sizeof(unsigned int));
     for (j = 0; j < l.classes; ++j) detector_gpu.track_id[j] = 1;
@@ -155,8 +154,8 @@ LIB_API Detector::~Detector()
     free(detector_gpu.track_id);
 
     free(detector_gpu.avg);
-    for (int j = 0; j < FRAMES; ++j) free(detector_gpu.predictions[j]);
-    for (int j = 0; j < FRAMES; ++j) if(detector_gpu.images[j].data) free(detector_gpu.images[j].data);
+    for (int j = 0; j < NFRAMES; ++j) free(detector_gpu.predictions[j]);
+    for (int j = 0; j < NFRAMES; ++j) if (detector_gpu.images[j].data) free(detector_gpu.images[j].data);
 
     int old_gpu_index;
 #ifdef GPU
@@ -275,9 +274,9 @@ LIB_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool use
 
     if (use_mean) {
         memcpy(detector_gpu.predictions[detector_gpu.demo_index], prediction, l.outputs * sizeof(float));
-        mean_arrays(detector_gpu.predictions, FRAMES, l.outputs, detector_gpu.avg);
+        mean_arrays(detector_gpu.predictions, NFRAMES, l.outputs, detector_gpu.avg);
         l.output = detector_gpu.avg;
-        detector_gpu.demo_index = (detector_gpu.demo_index + 1) % FRAMES;
+        detector_gpu.demo_index = (detector_gpu.demo_index + 1) % NFRAMES;
     }
     //get_region_boxes(l, 1, 1, thresh, detector_gpu.probs, detector_gpu.boxes, 0, 0);
     //if (nms) do_nms_sort(detector_gpu.boxes, detector_gpu.probs, l.w*l.h*l.n, l.classes, nms);
@@ -378,4 +377,4 @@ LIB_API std::vector<bbox_t> Detector::tracking_id(std::vector<bbox_t> cur_bbox_v
     }
 
     return cur_bbox_vec;
-}
\ No newline at end of file
+}