improve compatibility with c++ compilers, prepare for CMake

This commit is contained in:
Stefano Sinigardi
2019-02-14 17:28:23 +01:00
parent 3d9c8530a0
commit b3579380dc
128 changed files with 1871 additions and 2258 deletions

5
.gitignore vendored
View File

@ -28,3 +28,8 @@ Thumbs.db
# CMake # # CMake #
cmake-build-debug/ cmake-build-debug/
CMakeLists.txt CMakeLists.txt
build_*/
build.*
cmake/
*.patch
.gitignore

View File

@ -26,6 +26,38 @@
#endif #endif
#endif #endif
#ifdef _WIN32
#define PORT unsigned long
#define ADDRPOINTER int*
#else
#define PORT unsigned short
#define SOCKET int
#define HOSTENT struct hostent
#define SOCKADDR struct sockaddr
#define SOCKADDR_IN struct sockaddr_in
#define ADDRPOINTER unsigned int*
#define INVALID_SOCKET -1
#define SOCKET_ERROR -1
#endif
#define FULL_MASK 0xffffffff
#define WARP_SIZE 32
#define BLOCK 512
#define NUMCHARS 37
#define NFRAMES 3
#define BLOCK_TRANSPOSE32 256
#define DOABS 1
#define SECRET_NUM -1234
#define C_SHARP_MAX_OBJECTS 1000
#define TILE_M 4 // 4 ops
#define TILE_N 16 // AVX2 = 2 ops * 8 floats
#define TILE_K 16 // loop
#ifndef __COMPAR_FN_T
#define __COMPAR_FN_T
typedef int (*__compar_fn_t)(const void*, const void*);
#ifdef __USE_GNU
typedef __compar_fn_t comparison_fn_t;
#endif
#endif
#ifdef GPU #ifdef GPU
#define BLOCK 512 #define BLOCK 512

View File

@ -1,17 +1,7 @@
#pragma once #ifndef YOLO_V2_CLASS_HPP
#ifdef LIB_EXPORTS #define YOLO_V2_CLASS_HPP
#if defined(_MSC_VER)
#define LIB_API __declspec(dllexport) #include "darknet.h"
#else
#define LIB_API __attribute__((visibility("default")))
#endif
#else
#if defined(_MSC_VER)
#define LIB_API
#else
#define LIB_API
#endif
#endif
struct bbox_t { struct bbox_t {
unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box
@ -28,7 +18,6 @@ struct image_t {
float *data; // pointer to the image data float *data; // pointer to the image data
}; };
#define C_SHARP_MAX_OBJECTS 1000
struct bbox_t_container { struct bbox_t_container {
bbox_t candidates[C_SHARP_MAX_OBJECTS]; bbox_t candidates[C_SHARP_MAX_OBJECTS];
}; };
@ -41,8 +30,8 @@ struct bbox_t_container {
#ifdef OPENCV #ifdef OPENCV
#include <opencv2/opencv.hpp> // C++ #include <opencv2/opencv.hpp> // C++
#include "opencv2/highgui/highgui_c.h" // C #include <opencv2/highgui/highgui_c.h> // C
#include "opencv2/imgproc/imgproc_c.h" // C #include <opencv2/imgproc/imgproc_c.h> // C
#endif // OPENCV #endif // OPENCV
extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu); extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu);
@ -658,3 +647,4 @@ void free_img(image_t m) {
#endif // __cplusplus #endif // __cplusplus
*/ */
#endif

View File

@ -3,10 +3,8 @@
#include "curand.h" #include "curand.h"
#include "cublas_v2.h" #include "cublas_v2.h"
extern "C" {
#include "activations.h" #include "activations.h"
#include "cuda.h" #include "cuda.h"
}
__device__ float lhtan_activate_kernel(float x) __device__ float lhtan_activate_kernel(float x)

View File

@ -11,15 +11,15 @@
layer make_activation_layer(int batch, int inputs, ACTIVATION activation) layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
{ {
layer l = {0}; layer l = { (LAYER_TYPE)0 };
l.type = ACTIVE; l.type = ACTIVE;
l.inputs = inputs; l.inputs = inputs;
l.outputs = inputs; l.outputs = inputs;
l.batch=batch; l.batch=batch;
l.output = calloc(batch*inputs, sizeof(float*)); l.output = (float*)calloc(batch * inputs, sizeof(float));
l.delta = calloc(batch*inputs, sizeof(float*)); l.delta = (float*)calloc(batch * inputs, sizeof(float));
l.forward = forward_activation_layer; l.forward = forward_activation_layer;
l.backward = backward_activation_layer; l.backward = backward_activation_layer;

View File

@ -5,6 +5,9 @@
#include "layer.h" #include "layer.h"
#include "network.h" #include "network.h"
#ifdef __cplusplus
extern "C" {
#endif
layer make_activation_layer(int batch, int inputs, ACTIVATION activation); layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
void forward_activation_layer(layer l, network_state state); void forward_activation_layer(layer l, network_state state);
@ -15,5 +18,8 @@ void forward_activation_layer_gpu(layer l, network_state state);
void backward_activation_layer_gpu(layer l, network_state state); void backward_activation_layer_gpu(layer l, network_state state);
#endif #endif
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -8,6 +8,9 @@
// LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU // LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
//}ACTIVATION; //}ACTIVATION;
#ifdef __cplusplus
extern "C" {
#endif
ACTIVATION get_activation(char *s); ACTIVATION get_activation(char *s);
char *get_activation_string(ACTIVATION a); char *get_activation_string(ACTIVATION a);
@ -87,5 +90,8 @@ static inline float leaky_gradient(float x){return (x>0) ? 1 : .1f;}
static inline float tanh_gradient(float x){return 1-x*x;} static inline float tanh_gradient(float x){return 1-x*x;}
static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01f : .125f;} static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01f : .125f;}
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -6,7 +6,6 @@
#include "classifier.h" #include "classifier.h"
#ifdef WIN32 #ifdef WIN32
#include <time.h> #include <time.h>
#include <winsock.h>
#include "gettimeofday.h" #include "gettimeofday.h"
#else #else
#include <sys/time.h> #include <sys/time.h>
@ -14,10 +13,10 @@
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#include "opencv2/core/version.hpp" #include <opencv2/core/version.hpp>
#ifndef CV_VERSION_EPOCH #ifndef CV_VERSION_EPOCH
#include "opencv2/videoio/videoio_c.h" #include <opencv2/videoio/videoio_c.h>
#endif #endif
image get_image_from_stream(CvCapture *cap); image get_image_from_stream(CvCapture *cap);
#endif #endif

View File

@ -5,7 +5,7 @@
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
{ {
fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c);
avgpool_layer l = {0}; avgpool_layer l = { (LAYER_TYPE)0 };
l.type = AVGPOOL; l.type = AVGPOOL;
l.batch = batch; l.batch = batch;
l.h = h; l.h = h;
@ -17,8 +17,8 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
l.outputs = l.out_c; l.outputs = l.out_c;
l.inputs = h*w*c; l.inputs = h*w*c;
int output_size = l.outputs * batch; int output_size = l.outputs * batch;
l.output = calloc(output_size, sizeof(float)); l.output = (float*)calloc(output_size, sizeof(float));
l.delta = calloc(output_size, sizeof(float)); l.delta = (float*)calloc(output_size, sizeof(float));
l.forward = forward_avgpool_layer; l.forward = forward_avgpool_layer;
l.backward = backward_avgpool_layer; l.backward = backward_avgpool_layer;
#ifdef GPU #ifdef GPU

View File

@ -8,6 +8,9 @@
typedef layer avgpool_layer; typedef layer avgpool_layer;
#ifdef __cplusplus
extern "C" {
#endif
image get_avgpool_image(avgpool_layer l); image get_avgpool_image(avgpool_layer l);
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
void resize_avgpool_layer(avgpool_layer *l, int w, int h); void resize_avgpool_layer(avgpool_layer *l, int w, int h);
@ -19,5 +22,8 @@ void forward_avgpool_layer_gpu(avgpool_layer l, network_state state);
void backward_avgpool_layer_gpu(avgpool_layer l, network_state state); void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
#endif #endif
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -2,10 +2,8 @@
#include "curand.h" #include "curand.h"
#include "cublas_v2.h" #include "cublas_v2.h"
extern "C" {
#include "avgpool_layer.h" #include "avgpool_layer.h"
#include "cuda.h" #include "cuda.h"
}
__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
{ {

View File

@ -5,29 +5,29 @@
layer make_batchnorm_layer(int batch, int w, int h, int c) layer make_batchnorm_layer(int batch, int w, int h, int c)
{ {
fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
layer layer = {0}; layer layer = { (LAYER_TYPE)0 };
layer.type = BATCHNORM; layer.type = BATCHNORM;
layer.batch = batch; layer.batch = batch;
layer.h = layer.out_h = h; layer.h = layer.out_h = h;
layer.w = layer.out_w = w; layer.w = layer.out_w = w;
layer.c = layer.out_c = c; layer.c = layer.out_c = c;
layer.output = calloc(h * w * c * batch, sizeof(float)); layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
layer.delta = calloc(h * w * c * batch, sizeof(float)); layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
layer.inputs = w*h*c; layer.inputs = w*h*c;
layer.outputs = layer.inputs; layer.outputs = layer.inputs;
layer.scales = calloc(c, sizeof(float)); layer.scales = (float*)calloc(c, sizeof(float));
layer.scale_updates = calloc(c, sizeof(float)); layer.scale_updates = (float*)calloc(c, sizeof(float));
int i; int i;
for(i = 0; i < c; ++i){ for(i = 0; i < c; ++i){
layer.scales[i] = 1; layer.scales[i] = 1;
} }
layer.mean = calloc(c, sizeof(float)); layer.mean = (float*)calloc(c, sizeof(float));
layer.variance = calloc(c, sizeof(float)); layer.variance = (float*)calloc(c, sizeof(float));
layer.rolling_mean = calloc(c, sizeof(float)); layer.rolling_mean = (float*)calloc(c, sizeof(float));
layer.rolling_variance = calloc(c, sizeof(float)); layer.rolling_variance = (float*)calloc(c, sizeof(float));
layer.forward = forward_batchnorm_layer; layer.forward = forward_batchnorm_layer;
layer.backward = backward_batchnorm_layer; layer.backward = backward_batchnorm_layer;

View File

@ -5,6 +5,9 @@
#include "layer.h" #include "layer.h"
#include "network.h" #include "network.h"
#ifdef __cplusplus
extern "C" {
#endif
layer make_batchnorm_layer(int batch, int w, int h, int c); layer make_batchnorm_layer(int batch, int w, int h, int c);
void forward_batchnorm_layer(layer l, network_state state); void forward_batchnorm_layer(layer l, network_state state);
void backward_batchnorm_layer(layer l, network_state state); void backward_batchnorm_layer(layer l, network_state state);
@ -16,4 +19,7 @@ void pull_batchnorm_layer(layer l);
void push_batchnorm_layer(layer l); void push_batchnorm_layer(layer l);
#endif #endif
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -34,7 +34,7 @@ void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride,
void flatten(float *x, int size, int layers, int batch, int forward) void flatten(float *x, int size, int layers, int batch, int forward)
{ {
float *swap = calloc(size*layers*batch, sizeof(float)); float* swap = (float*)calloc(size * layers * batch, sizeof(float));
int i,c,b; int i,c,b;
for(b = 0; b < batch; ++b){ for(b = 0; b < batch; ++b){
for(c = 0; c < layers; ++c){ for(c = 0; c < layers; ++c){

View File

@ -1,5 +1,12 @@
#ifndef BLAS_H #ifndef BLAS_H
#define BLAS_H #define BLAS_H
#ifdef GPU
#include "cuda.h"
#include "tree.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
void flatten(float *x, int size, int layers, int batch, int forward); void flatten(float *x, int size, int layers, int batch, int forward);
void pm(int M, int N, float *A); void pm(int M, int N, float *A);
float *random_matrix(int rows, int cols); float *random_matrix(int rows, int cols);
@ -41,8 +48,6 @@ void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, i
void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
#ifdef GPU #ifdef GPU
#include "cuda.h"
#include "tree.h"
void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
@ -97,5 +102,8 @@ void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int for
void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);
#endif
#ifdef __cplusplus
}
#endif #endif
#endif #endif

View File

@ -3,12 +3,10 @@
#include "cublas_v2.h" #include "cublas_v2.h"
#include <assert.h> #include <assert.h>
extern "C" {
#include "blas.h" #include "blas.h"
#include "cuda.h" #include "cuda.h"
#include "utils.h" #include "utils.h"
#include "tree.h" #include "tree.h"
}
__global__ void scale_bias_kernel(float *output, float *biases, int n, int size) __global__ void scale_bias_kernel(float *output, float *biases, int n, int size)
{ {

View File

@ -249,7 +249,7 @@ int nms_comparator(const void *pa, const void *pb)
void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh) void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh)
{ {
int i, j, k; int i, j, k;
sortable_bbox *s = calloc(total, sizeof(sortable_bbox)); sortable_bbox* s = (sortable_bbox*)calloc(total, sizeof(sortable_bbox));
for(i = 0; i < total; ++i){ for(i = 0; i < total; ++i){
s[i].index = i; s[i].index = i;

View File

@ -27,6 +27,9 @@ typedef struct detection_with_class {
int best_class; int best_class;
} detection_with_class; } detection_with_class;
#ifdef __cplusplus
extern "C" {
#endif
box float_to_box(float *f); box float_to_box(float *f);
float box_iou(box a, box b); float box_iou(box a, box b);
float box_rmse(box a, box b); float box_rmse(box a, box b);
@ -42,4 +45,7 @@ box encode_box(box b, box anchor);
// Return number of selected detections in *selected_detections_num // Return number of selected detections in *selected_detections_num
detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names); detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names);
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -41,11 +41,11 @@ void train_captcha(char *cfgfile, char *weightfile)
int i = *net.seen/imgs; int i = *net.seen/imgs;
int solved = 1; int solved = 1;
list *plist; list *plist;
char **labels = get_labels("/data/captcha/reimgs.labels.list"); char** labels = get_labels("data/captcha/reimgs.labels.list");
if (solved){ if (solved){
plist = get_paths("/data/captcha/reimgs.solved.list"); plist = get_paths("data/captcha/reimgs.solved.list");
}else{ }else{
plist = get_paths("/data/captcha/reimgs.raw.list"); plist = get_paths("data/captcha/reimgs.raw.list");
} }
char **paths = (char **)list_to_array(plist); char **paths = (char **)list_to_array(plist);
printf("%d\n", plist->size); printf("%d\n", plist->size);
@ -89,7 +89,7 @@ void train_captcha(char *cfgfile, char *weightfile)
free_data(train); free_data(train);
if(i%100==0){ if(i%100==0){
char buff[256]; char buff[256];
sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); sprintf(buff, "imagenet_backup/%s_%d.weights", base, i);
save_weights(net, buff); save_weights(net, buff);
} }
} }
@ -104,7 +104,7 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
set_batch_network(&net, 1); set_batch_network(&net, 1);
srand(2222222); srand(2222222);
int i = 0; int i = 0;
char **names = get_labels("/data/captcha/reimgs.labels.list"); char** names = get_labels("data/captcha/reimgs.labels.list");
char buff[256]; char buff[256];
char *input = buff; char *input = buff;
int indexes[26]; int indexes[26];
@ -137,12 +137,12 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
void valid_captcha(char *cfgfile, char *weightfile, char *filename) void valid_captcha(char *cfgfile, char *weightfile, char *filename)
{ {
char **labels = get_labels("/data/captcha/reimgs.labels.list"); char** labels = get_labels("data/captcha/reimgs.labels.list");
network net = parse_network_cfg(cfgfile); network net = parse_network_cfg(cfgfile);
if(weightfile){ if(weightfile){
load_weights(&net, weightfile); load_weights(&net, weightfile);
} }
list *plist = get_paths("/data/captcha/reimgs.fg.list"); list* plist = get_paths("data/captcha/reimgs.fg.list");
char **paths = (char **)list_to_array(plist); char **paths = (char **)list_to_array(plist);
int N = plist->size; int N = plist->size;
int outputs = net.outputs; int outputs = net.outputs;

View File

@ -5,7 +5,7 @@
#include "blas.h" #include "blas.h"
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#endif #endif
void train_cifar(char *cfgfile, char *weightfile) void train_cifar(char *cfgfile, char *weightfile)
@ -20,7 +20,7 @@ void train_cifar(char *cfgfile, char *weightfile)
} }
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
char *backup_directory = "/home/pjreddie/backup/"; char* backup_directory = "backup/";
int classes = 10; int classes = 10;
int N = 50000; int N = 50000;
@ -68,7 +68,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
} }
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
char *backup_directory = "/home/pjreddie/backup/"; char* backup_directory = "backup/";
int classes = 10; int classes = 10;
int N = 50000; int N = 50000;

View File

@ -8,20 +8,18 @@
#include "cuda.h" #include "cuda.h"
#ifdef WIN32 #ifdef WIN32
#include <time.h> #include <time.h>
#include <winsock.h>
#include "gettimeofday.h" #include "gettimeofday.h"
#else #else
#include <sys/time.h> #include <sys/time.h>
#endif #endif
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#include "opencv2/core/version.hpp" #include <opencv2/core/version.hpp>
#ifndef CV_VERSION_EPOCH #ifndef CV_VERSION_EPOCH
#include "opencv2/videoio/videoio_c.h" #include <opencv2/videoio/videoio_c.h>
#endif #endif
image get_image_from_stream(CvCapture *cap); #include "image.h"
image get_image_from_stream_cpp(CvCapture *cap);
#include "http_stream.h" #include "http_stream.h"
IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show); IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
@ -34,7 +32,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
float *get_regression_values(char **labels, int n) float *get_regression_values(char **labels, int n)
{ {
float *v = calloc(n, sizeof(float)); float* v = (float*)calloc(n, sizeof(float));
int i; int i;
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
char *p = strchr(labels[i], ' '); char *p = strchr(labels[i], ' ');
@ -52,7 +50,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
char *base = basecfg(cfgfile); char *base = basecfg(cfgfile);
printf("%s\n", base); printf("%s\n", base);
printf("%d\n", ngpus); printf("%d\n", ngpus);
network *nets = calloc(ngpus, sizeof(network)); network* nets = (network*)calloc(ngpus, sizeof(network));
srand(time(0)); srand(time(0));
int seed = rand(); int seed = rand();
@ -431,7 +429,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
float avg_acc = 0; float avg_acc = 0;
float avg_topk = 0; float avg_topk = 0;
int *indexes = calloc(topk, sizeof(int)); int* indexes = (int*)calloc(topk, sizeof(int));
for(i = 0; i < m; ++i){ for(i = 0; i < m; ++i){
int class_id = -1; int class_id = -1;
@ -458,7 +456,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
images[7] = crop_image(im, 0, 0, w, h); images[7] = crop_image(im, 0, 0, w, h);
images[8] = crop_image(im, -shift, shift, w, h); images[8] = crop_image(im, -shift, shift, w, h);
images[9] = crop_image(im, shift, shift, w, h); images[9] = crop_image(im, shift, shift, w, h);
float *pred = calloc(classes, sizeof(float)); float* pred = (float*)calloc(classes, sizeof(float));
for(j = 0; j < 10; ++j){ for(j = 0; j < 10; ++j){
float *p = network_predict(net, images[j].data); float *p = network_predict(net, images[j].data);
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1); if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
@ -504,7 +502,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
float avg_acc = 0; float avg_acc = 0;
float avg_topk = 0; float avg_topk = 0;
int *indexes = calloc(topk, sizeof(int)); int* indexes = (int*)calloc(topk, sizeof(int));
int size = net.w; int size = net.w;
for(i = 0; i < m; ++i){ for(i = 0; i < m; ++i){
@ -581,7 +579,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
float avg_acc = 0; float avg_acc = 0;
float avg_topk = 0; float avg_topk = 0;
int *indexes = calloc(topk, sizeof(int)); int* indexes = (int*)calloc(topk, sizeof(int));
for(i = 0; i < m; ++i){ for(i = 0; i < m; ++i){
int class_id = -1; int class_id = -1;
@ -651,7 +649,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
float avg_acc = 0; float avg_acc = 0;
float avg_topk = 0; float avg_topk = 0;
int *indexes = calloc(topk, sizeof(int)); int* indexes = (int*)calloc(topk, sizeof(int));
for(i = 0; i < m; ++i){ for(i = 0; i < m; ++i){
int class_id = -1; int class_id = -1;
@ -662,7 +660,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
break; break;
} }
} }
float *pred = calloc(classes, sizeof(float)); float* pred = (float*)calloc(classes, sizeof(float));
image im = load_image_color(paths[i], 0, 0); image im = load_image_color(paths[i], 0, 0);
for(j = 0; j < nscales; ++j){ for(j = 0; j < nscales; ++j){
image r = resize_min(im, scales[j]); image r = resize_min(im, scales[j]);
@ -707,7 +705,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena
int i = 0; int i = 0;
char **names = get_labels(name_list); char **names = get_labels(name_list);
clock_t time; clock_t time;
int *indexes = calloc(top, sizeof(int)); int* indexes = (int*)calloc(top, sizeof(int));
char buff[256]; char buff[256];
char *input = buff; char *input = buff;
while(1){ while(1){
@ -790,7 +788,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
int i = 0; int i = 0;
char **names = get_labels(name_list); char **names = get_labels(name_list);
clock_t time; clock_t time;
int *indexes = calloc(top, sizeof(int)); int* indexes = (int*)calloc(top, sizeof(int));
char buff[256]; char buff[256];
char *input = buff; char *input = buff;
int size = net.w; int size = net.w;
@ -973,7 +971,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
char *name_list = option_find_str(options, "names", 0); char *name_list = option_find_str(options, "names", 0);
char **names = get_labels(name_list); char **names = get_labels(name_list);
int *indexes = calloc(top, sizeof(int)); int* indexes = (int*)calloc(top, sizeof(int));
if(!cap) error("Couldn't connect to webcam.\n"); if(!cap) error("Couldn't connect to webcam.\n");
//cvNamedWindow("Threat", CV_WINDOW_NORMAL); //cvNamedWindow("Threat", CV_WINDOW_NORMAL);
@ -1051,11 +1049,13 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
} }
top_predictions(net, top, indexes); top_predictions(net, top, indexes);
char buff[256]; char buff[256];
sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count); sprintf(buff, "tmp/threat_%06d", count);
//save_image(out, buff); //save_image(out, buff);
#ifndef _WIN32
printf("\033[2J"); printf("\033[2J");
printf("\033[1;1H"); printf("\033[1;1H");
#endif
printf("\nFPS:%.0f\n",fps); printf("\nFPS:%.0f\n",fps);
for(i = 0; i < top; ++i){ for(i = 0; i < top; ++i){
@ -1111,7 +1111,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
char *name_list = option_find_str(options, "names", 0); char *name_list = option_find_str(options, "names", 0);
char **names = get_labels(name_list); char **names = get_labels(name_list);
int *indexes = calloc(top, sizeof(int)); int* indexes = (int*)calloc(top, sizeof(int));
if(!cap) error("Couldn't connect to webcam.\n"); if(!cap) error("Couldn't connect to webcam.\n");
cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL); cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL);
@ -1193,7 +1193,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
char *name_list = option_find_str(options, "names", 0); char *name_list = option_find_str(options, "names", 0);
char **names = get_labels(name_list); char **names = get_labels(name_list);
int *indexes = calloc(top, sizeof(int)); int* indexes = (int*)calloc(top, sizeof(int));
if(!cap) error("Couldn't connect to webcam.\n"); if(!cap) error("Couldn't connect to webcam.\n");
cvNamedWindow("Classifier", CV_WINDOW_NORMAL); cvNamedWindow("Classifier", CV_WINDOW_NORMAL);
@ -1214,8 +1214,10 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1); if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1);
top_predictions(net, top, indexes); top_predictions(net, top, indexes);
#ifndef _WIN32
printf("\033[2J"); printf("\033[2J");
printf("\033[1;1H"); printf("\033[1;1H");
#endif
printf("\nFPS:%.0f\n",fps); printf("\nFPS:%.0f\n",fps);
for(i = 0; i < top; ++i){ for(i = 0; i < top; ++i){
@ -1257,7 +1259,7 @@ void run_classifier(int argc, char **argv)
for(i = 0; i < len; ++i){ for(i = 0; i < len; ++i){
if (gpu_list[i] == ',') ++ngpus; if (gpu_list[i] == ',') ++ngpus;
} }
gpus = calloc(ngpus, sizeof(int)); gpus = (int*)calloc(ngpus, sizeof(int));
for(i = 0; i < ngpus; ++i){ for(i = 0; i < ngpus; ++i){
gpus[i] = atoi(gpu_list); gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',')+1; gpu_list = strchr(gpu_list, ',')+1;

View File

@ -1,2 +1,12 @@
#ifndef CLASSIFIER_H
#define CLASSIFIER_H
#include "list.h"
#ifdef __cplusplus
extern "C" {
#endif
list *read_data_cfg(char *filename); list *read_data_cfg(char *filename);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -9,7 +9,7 @@
#include "demo.h" #include "demo.h"
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#endif #endif
char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};
@ -22,7 +22,7 @@ void train_coco(char *cfgfile, char *weightfile)
//char *train_images = "/home/pjreddie/data/coco/train.txt"; //char *train_images = "/home/pjreddie/data/coco/train.txt";
char *train_images = "data/coco.trainval.txt"; char *train_images = "data/coco.trainval.txt";
//char *train_images = "data/bags.train.list"; //char *train_images = "data/bags.train.list";
char *backup_directory = "/home/pjreddie/backup/"; char* backup_directory = "backup/";
srand(time(0)); srand(time(0));
char *base = basecfg(cfgfile); char *base = basecfg(cfgfile);
printf("%s\n", base); printf("%s\n", base);
@ -164,9 +164,9 @@ void validate_coco(char *cfgfile, char *weightfile)
FILE *fp = fopen(buff, "w"); FILE *fp = fopen(buff, "w");
fprintf(fp, "[\n"); fprintf(fp, "[\n");
box *boxes = calloc(side*side*l.n, sizeof(box)); box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
float **probs = calloc(side*side*l.n, sizeof(float *)); float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
int m = plist->size; int m = plist->size;
int i=0; int i=0;
@ -177,11 +177,11 @@ void validate_coco(char *cfgfile, char *weightfile)
float iou_thresh = .5; float iou_thresh = .5;
int nthreads = 8; int nthreads = 8;
image *val = calloc(nthreads, sizeof(image)); image* val = (image*)calloc(nthreads, sizeof(image));
image *val_resized = calloc(nthreads, sizeof(image)); image* val_resized = (image*)calloc(nthreads, sizeof(image));
image *buf = calloc(nthreads, sizeof(image)); image* buf = (image*)calloc(nthreads, sizeof(image));
image *buf_resized = calloc(nthreads, sizeof(image)); image* buf_resized = (image*)calloc(nthreads, sizeof(image));
pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
load_args args = {0}; load_args args = {0};
args.w = net.w; args.w = net.w;
@ -240,7 +240,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
srand(time(0)); srand(time(0));
char *base = "results/comp4_det_test_"; char *base = "results/comp4_det_test_";
list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); list* plist = get_paths("data/voc/test/2007_test.txt");
char **paths = (char **)list_to_array(plist); char **paths = (char **)list_to_array(plist);
layer l = net.layers[net.n-1]; layer l = net.layers[net.n-1];
@ -248,15 +248,15 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
int side = l.side; int side = l.side;
int j, k; int j, k;
FILE **fps = calloc(classes, sizeof(FILE *)); FILE** fps = (FILE**)calloc(classes, sizeof(FILE*));
for(j = 0; j < classes; ++j){ for(j = 0; j < classes; ++j){
char buff[1024]; char buff[1024];
snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
fps[j] = fopen(buff, "w"); fps[j] = fopen(buff, "w");
} }
box *boxes = calloc(side*side*l.n, sizeof(box)); box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
float **probs = calloc(side*side*l.n, sizeof(float *)); float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
int m = plist->size; int m = plist->size;
int i=0; int i=0;
@ -328,9 +328,9 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
char buff[256]; char buff[256];
char *input = buff; char *input = buff;
int j; int j;
box *boxes = calloc(l.side*l.side*l.n, sizeof(box)); box* boxes = (box*)calloc(l.side * l.side * l.n, sizeof(box));
float **probs = calloc(l.side*l.side*l.n, sizeof(float *)); float** probs = (float**)calloc(l.side * l.side * l.n, sizeof(float*));
for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
while(1){ while(1){
if(filename){ if(filename){
strncpy(input, filename, 256); strncpy(input, filename, 256);

View File

@ -1,5 +1,6 @@
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include "col2im.h"
void col2im_add_pixel(float *im, int height, int width, int channels, void col2im_add_pixel(float *im, int height, int width, int channels,
int row, int col, int channel, int pad, float val) int row, int col, int channel, int pad, float val)
{ {

View File

@ -1,6 +1,9 @@
#ifndef COL2IM_H #ifndef COL2IM_H
#define COL2IM_H #define COL2IM_H
#ifdef __cplusplus
extern "C" {
#endif
void col2im_cpu(float* data_col, void col2im_cpu(float* data_col,
int channels, int height, int width, int channels, int height, int width,
int ksize, int stride, int pad, float* data_im); int ksize, int stride, int pad, float* data_im);
@ -10,4 +13,7 @@ void col2im_ongpu(float *data_col,
int channels, int height, int width, int channels, int height, int width,
int ksize, int stride, int pad, float *data_im); int ksize, int stride, int pad, float *data_im);
#endif #endif
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -2,10 +2,8 @@
#include "curand.h" #include "curand.h"
#include "cublas_v2.h" #include "cublas_v2.h"
extern "C" {
#include "col2im.h" #include "col2im.h"
#include "cuda.h" #include "cuda.h"
}
// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE

View File

@ -12,7 +12,7 @@ void train_compare(char *cfgfile, char *weightfile)
srand(time(0)); srand(time(0));
float avg_loss = -1; float avg_loss = -1;
char *base = basecfg(cfgfile); char *base = basecfg(cfgfile);
char *backup_directory = "/home/pjreddie/backup/"; char* backup_directory = "backup/";
printf("%s\n", base); printf("%s\n", base);
network net = parse_network_cfg(cfgfile); network net = parse_network_cfg(cfgfile);
if(weightfile){ if(weightfile){
@ -176,7 +176,7 @@ int bbox_comparator(const void *a, const void *b)
image im1 = load_image_color(box1.filename, net.w, net.h); image im1 = load_image_color(box1.filename, net.w, net.h);
image im2 = load_image_color(box2.filename, net.w, net.h); image im2 = load_image_color(box2.filename, net.w, net.h);
float *X = calloc(net.w*net.h*net.c, sizeof(float)); float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float));
memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
float *predictions = network_predict(net, X); float *predictions = network_predict(net, X);
@ -205,7 +205,7 @@ void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, in
{ {
image im1 = load_image_color(a->filename, net.w, net.h); image im1 = load_image_color(a->filename, net.w, net.h);
image im2 = load_image_color(b->filename, net.w, net.h); image im2 = load_image_color(b->filename, net.w, net.h);
float *X = calloc(net.w*net.h*net.c, sizeof(float)); float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float));
memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
float *predictions = network_predict(net, X); float *predictions = network_predict(net, X);
@ -239,7 +239,7 @@ void SortMaster3000(char *filename, char *weightfile)
char **paths = (char **)list_to_array(plist); char **paths = (char **)list_to_array(plist);
int N = plist->size; int N = plist->size;
free_list(plist); free_list(plist);
sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
printf("Sorting %d boxes...\n", N); printf("Sorting %d boxes...\n", N);
for(i = 0; i < N; ++i){ for(i = 0; i < N; ++i){
boxes[i].filename = paths[i]; boxes[i].filename = paths[i];
@ -274,13 +274,13 @@ void BattleRoyaleWithCheese(char *filename, char *weightfile)
int N = plist->size; int N = plist->size;
int total = N; int total = N;
free_list(plist); free_list(plist);
sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
printf("Battling %d boxes...\n", N); printf("Battling %d boxes...\n", N);
for(i = 0; i < N; ++i){ for(i = 0; i < N; ++i){
boxes[i].filename = paths[i]; boxes[i].filename = paths[i];
boxes[i].net = net; boxes[i].net = net;
boxes[i].classes = classes; boxes[i].classes = classes;
boxes[i].elos = calloc(classes, sizeof(float));; boxes[i].elos = (float*)calloc(classes, sizeof(float));
for(j = 0; j < classes; ++j){ for(j = 0; j < classes; ++j){
boxes[i].elos[j] = 1500; boxes[i].elos[j] = 1500;
} }

View File

@ -54,7 +54,7 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
{ {
int total_batch = batch*steps; int total_batch = batch*steps;
int i; int i;
connected_layer l = {0}; connected_layer l = { (LAYER_TYPE)0 };
l.type = CONNECTED; l.type = CONNECTED;
l.inputs = inputs; l.inputs = inputs;
@ -74,14 +74,14 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
l.activation = activation; l.activation = activation;
l.learning_rate_scale = 1; l.learning_rate_scale = 1;
l.output = calloc(total_batch*outputs, sizeof(float)); l.output = (float*)calloc(total_batch * outputs, sizeof(float));
l.delta = calloc(total_batch*outputs, sizeof(float)); l.delta = (float*)calloc(total_batch * outputs, sizeof(float));
l.weight_updates = calloc(inputs*outputs, sizeof(float)); l.weight_updates = (float*)calloc(inputs * outputs, sizeof(float));
l.bias_updates = calloc(outputs, sizeof(float)); l.bias_updates = (float*)calloc(outputs, sizeof(float));
l.weights = calloc(outputs*inputs, sizeof(float)); l.weights = (float*)calloc(outputs * inputs, sizeof(float));
l.biases = calloc(outputs, sizeof(float)); l.biases = (float*)calloc(outputs, sizeof(float));
l.forward = forward_connected_layer; l.forward = forward_connected_layer;
l.backward = backward_connected_layer; l.backward = backward_connected_layer;
@ -98,22 +98,22 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
} }
if(batch_normalize){ if(batch_normalize){
l.scales = calloc(outputs, sizeof(float)); l.scales = (float*)calloc(outputs, sizeof(float));
l.scale_updates = calloc(outputs, sizeof(float)); l.scale_updates = (float*)calloc(outputs, sizeof(float));
for(i = 0; i < outputs; ++i){ for(i = 0; i < outputs; ++i){
l.scales[i] = 1; l.scales[i] = 1;
} }
l.mean = calloc(outputs, sizeof(float)); l.mean = (float*)calloc(outputs, sizeof(float));
l.mean_delta = calloc(outputs, sizeof(float)); l.mean_delta = (float*)calloc(outputs, sizeof(float));
l.variance = calloc(outputs, sizeof(float)); l.variance = (float*)calloc(outputs, sizeof(float));
l.variance_delta = calloc(outputs, sizeof(float)); l.variance_delta = (float*)calloc(outputs, sizeof(float));
l.rolling_mean = calloc(outputs, sizeof(float)); l.rolling_mean = (float*)calloc(outputs, sizeof(float));
l.rolling_variance = calloc(outputs, sizeof(float)); l.rolling_variance = (float*)calloc(outputs, sizeof(float));
l.x = calloc(total_batch*outputs, sizeof(float)); l.x = (float*)calloc(total_batch * outputs, sizeof(float));
l.x_norm = calloc(total_batch*outputs, sizeof(float)); l.x_norm = (float*)calloc(total_batch * outputs, sizeof(float));
} }
#ifdef GPU #ifdef GPU

View File

@ -7,7 +7,11 @@
typedef layer connected_layer; typedef layer connected_layer;
#ifdef __cplusplus
extern "C" {
#endif
connected_layer make_connected_layer(int batch, int steps, int inputs, int outputs, ACTIVATION activation, int batch_normalize); connected_layer make_connected_layer(int batch, int steps, int inputs, int outputs, ACTIVATION activation, int batch_normalize);
size_t get_connected_workspace_size(layer l);
void forward_connected_layer(connected_layer layer, network_state state); void forward_connected_layer(connected_layer layer, network_state state);
void backward_connected_layer(connected_layer layer, network_state state); void backward_connected_layer(connected_layer layer, network_state state);
@ -23,5 +27,8 @@ void push_connected_layer(connected_layer layer);
void pull_connected_layer(connected_layer layer); void pull_connected_layer(connected_layer layer);
#endif #endif
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -3,10 +3,11 @@
#include "cublas_v2.h" #include "cublas_v2.h"
#ifdef CUDNN #ifdef CUDNN
#ifndef USE_CMAKE_LIBS
#pragma comment(lib, "cudnn.lib") #pragma comment(lib, "cudnn.lib")
#endif #endif
#endif
extern "C" {
#include "convolutional_layer.h" #include "convolutional_layer.h"
#include "batchnorm_layer.h" #include "batchnorm_layer.h"
#include "gemm.h" #include "gemm.h"
@ -15,7 +16,7 @@ extern "C" {
#include "col2im.h" #include "col2im.h"
#include "utils.h" #include "utils.h"
#include "cuda.h" #include "cuda.h"
}
__global__ void binarize_kernel(float *x, int n, float *binary) __global__ void binarize_kernel(float *x, int n, float *binary)
{ {
@ -73,7 +74,6 @@ void binarize_weights_gpu(float *weights, int n, int size, float *binary)
CHECK_CUDA(cudaPeekAtLastError()); CHECK_CUDA(cudaPeekAtLastError());
} }
#define WARP_SIZE 32
__global__ void set_zero_kernel(float *src, int size) __global__ void set_zero_kernel(float *src, int size)
{ {
@ -477,10 +477,10 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
simple_copy_ongpu(l.outputs*l.batch / 2, output16, l.x_gpu); simple_copy_ongpu(l.outputs*l.batch / 2, output16, l.x_gpu);
//copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1); //copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1);
//cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream()); //cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream());
float one = 1; float one = 1.0f;
float zero = 0; float zero = 0.0f;
// Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth // Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth
// compared to FP32, it<EFBFBD>s just that the statistics and value adjustment should be done in FP32. // compared to FP32, it's just that the statistics and value adjustment should be done in FP32.
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(cudnn_handle(), CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(cudnn_handle(),
CUDNN_BATCHNORM_SPATIAL, CUDNN_BATCHNORM_SPATIAL,
&one, &one,
@ -639,8 +639,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
// l.mean_gpu = l.rolling_mean_gpu; // l.mean_gpu = l.rolling_mean_gpu;
// l.variance_gpu = l.rolling_variance_gpu; // l.variance_gpu = l.rolling_variance_gpu;
//} //}
float one = 1; float one = 1.0f;
float zero = 0; float zero = 0.0f;
CHECK_CUDNN(cudnnBatchNormalizationBackward(cudnn_handle(), CHECK_CUDNN(cudnnBatchNormalizationBackward(cudnn_handle(),
CUDNN_BATCHNORM_SPATIAL, CUDNN_BATCHNORM_SPATIAL,
&one, &one,
@ -936,4 +936,3 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float
} }
} }
*/ */

View File

@ -9,8 +9,10 @@
#include <time.h> #include <time.h>
#ifdef CUDNN #ifdef CUDNN
#ifndef USE_CMAKE_LIBS
#pragma comment(lib, "cudnn.lib") #pragma comment(lib, "cudnn.lib")
#endif #endif
#endif
#ifdef AI2 #ifdef AI2
#include "xnor_layer.h" #include "xnor_layer.h"
@ -288,7 +290,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
l->weightDesc, l->weightDesc,
l->convDesc, l->convDesc,
l->dstTensorDesc, l->dstTensorDesc,
forward_algo, (cudnnConvolutionFwdPreference_t)forward_algo,
0, 0,
&l->fw_algo)); &l->fw_algo));
CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
@ -296,7 +298,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
l->ddstTensorDesc, l->ddstTensorDesc,
l->convDesc, l->convDesc,
l->dsrcTensorDesc, l->dsrcTensorDesc,
backward_algo, (cudnnConvolutionBwdDataPreference_t)backward_algo,
0, 0,
&l->bd_algo)); &l->bd_algo));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
@ -304,7 +306,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
l->ddstTensorDesc, l->ddstTensorDesc,
l->convDesc, l->convDesc,
l->dweightDesc, l->dweightDesc,
backward_filter, (cudnnConvolutionBwdFilterPreference_t)backward_filter,
0, 0,
&l->bf_algo)); &l->bf_algo));
@ -328,7 +330,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
{ {
int total_batch = batch*steps; int total_batch = batch*steps;
int i; int i;
convolutional_layer l = {0}; convolutional_layer l = { (LAYER_TYPE)0 };
l.type = CONVOLUTIONAL; l.type = CONVOLUTIONAL;
l.index = index; l.index = index;
@ -346,11 +348,11 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
l.batch_normalize = batch_normalize; l.batch_normalize = batch_normalize;
l.learning_rate_scale = 1; l.learning_rate_scale = 1;
l.weights = calloc(c*n*size*size, sizeof(float)); l.weights = (float*)calloc(c * n * size * size, sizeof(float));
l.weight_updates = calloc(c*n*size*size, sizeof(float)); l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
l.biases = calloc(n, sizeof(float)); l.biases = (float*)calloc(n, sizeof(float));
l.bias_updates = calloc(n, sizeof(float)); l.bias_updates = (float*)calloc(n, sizeof(float));
// float scale = 1./sqrt(size*size*c); // float scale = 1./sqrt(size*size*c);
float scale = sqrt(2./(size*size*c)); float scale = sqrt(2./(size*size*c));
@ -364,64 +366,64 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
l.inputs = l.w * l.h * l.c; l.inputs = l.w * l.h * l.c;
l.activation = activation; l.activation = activation;
l.output = calloc(total_batch*l.outputs, sizeof(float)); l.output = (float*)calloc(total_batch*l.outputs, sizeof(float));
l.delta = calloc(total_batch*l.outputs, sizeof(float)); l.delta = (float*)calloc(total_batch*l.outputs, sizeof(float));
l.forward = forward_convolutional_layer; l.forward = forward_convolutional_layer;
l.backward = backward_convolutional_layer; l.backward = backward_convolutional_layer;
l.update = update_convolutional_layer; l.update = update_convolutional_layer;
if(binary){ if(binary){
l.binary_weights = calloc(c*n*size*size, sizeof(float)); l.binary_weights = (float*)calloc(c * n * size * size, sizeof(float));
l.cweights = calloc(c*n*size*size, sizeof(char)); l.cweights = (char*)calloc(c * n * size * size, sizeof(char));
l.scales = calloc(n, sizeof(float)); l.scales = (float*)calloc(n, sizeof(float));
} }
if(xnor){ if(xnor){
l.binary_weights = calloc(c*n*size*size, sizeof(float)); l.binary_weights = (float*)calloc(c * n * size * size, sizeof(float));
l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); l.binary_input = (float*)calloc(l.inputs * l.batch, sizeof(float));
int align = 32;// 8; int align = 32;// 8;
int src_align = l.out_h*l.out_w; int src_align = l.out_h*l.out_w;
l.bit_align = src_align + (align - src_align % align); l.bit_align = src_align + (align - src_align % align);
l.mean_arr = calloc(l.n, sizeof(float)); l.mean_arr = (float*)calloc(l.n, sizeof(float));
const size_t new_c = l.c / 32; const size_t new_c = l.c / 32;
size_t in_re_packed_input_size = new_c * l.w * l.h + 1; size_t in_re_packed_input_size = new_c * l.w * l.h + 1;
l.bin_re_packed_input = calloc(in_re_packed_input_size, sizeof(uint32_t)); l.bin_re_packed_input = (uint32_t*)calloc(in_re_packed_input_size, sizeof(uint32_t));
l.lda_align = 256; // AVX2 l.lda_align = 256; // AVX2
int k = l.size*l.size*l.c; int k = l.size*l.size*l.c;
size_t k_aligned = k + (l.lda_align - k%l.lda_align); size_t k_aligned = k + (l.lda_align - k%l.lda_align);
size_t t_bit_input_size = k_aligned * l.bit_align / 8; size_t t_bit_input_size = k_aligned * l.bit_align / 8;
l.t_bit_input = calloc(t_bit_input_size, sizeof(char)); l.t_bit_input = (char*)calloc(t_bit_input_size, sizeof(char));
} }
if(batch_normalize){ if(batch_normalize){
l.scales = calloc(n, sizeof(float)); l.scales = (float*)calloc(n, sizeof(float));
l.scale_updates = calloc(n, sizeof(float)); l.scale_updates = (float*)calloc(n, sizeof(float));
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
l.scales[i] = 1; l.scales[i] = 1;
} }
l.mean = calloc(n, sizeof(float)); l.mean = (float*)calloc(n, sizeof(float));
l.variance = calloc(n, sizeof(float)); l.variance = (float*)calloc(n, sizeof(float));
l.mean_delta = calloc(n, sizeof(float)); l.mean_delta = (float*)calloc(n, sizeof(float));
l.variance_delta = calloc(n, sizeof(float)); l.variance_delta = (float*)calloc(n, sizeof(float));
l.rolling_mean = calloc(n, sizeof(float)); l.rolling_mean = (float*)calloc(n, sizeof(float));
l.rolling_variance = calloc(n, sizeof(float)); l.rolling_variance = (float*)calloc(n, sizeof(float));
l.x = calloc(total_batch*l.outputs, sizeof(float)); l.x = (float*)calloc(total_batch * l.outputs, sizeof(float));
l.x_norm = calloc(total_batch*l.outputs, sizeof(float)); l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float));
} }
if(adam){ if(adam){
l.adam = 1; l.adam = 1;
l.m = calloc(c*n*size*size, sizeof(float)); l.m = (float*)calloc(c * n * size * size, sizeof(float));
l.v = calloc(c*n*size*size, sizeof(float)); l.v = (float*)calloc(c * n * size * size, sizeof(float));
l.bias_m = calloc(n, sizeof(float)); l.bias_m = (float*)calloc(n, sizeof(float));
l.scale_m = calloc(n, sizeof(float)); l.scale_m = (float*)calloc(n, sizeof(float));
l.bias_v = calloc(n, sizeof(float)); l.bias_v = (float*)calloc(n, sizeof(float));
l.scale_v = calloc(n, sizeof(float)); l.scale_v = (float*)calloc(n, sizeof(float));
} }
#ifdef GPU #ifdef GPU
@ -549,11 +551,11 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
l->outputs = l->out_h * l->out_w * l->out_c; l->outputs = l->out_h * l->out_w * l->out_c;
l->inputs = l->w * l->h * l->c; l->inputs = l->w * l->h * l->c;
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
if(l->batch_normalize){ if(l->batch_normalize){
l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); l->x = (float*)realloc(l->x, l->batch * l->outputs * sizeof(float));
l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); l->x_norm = (float*)realloc(l->x_norm, l->batch * l->outputs * sizeof(float));
} }
if (l->xnor) { if (l->xnor) {
@ -642,7 +644,7 @@ void gemm_nn_custom(int M, int N, int K, float ALPHA,
int i, j, k; int i, j, k;
for (i = 0; i < M; ++i) { for (i = 0; i < M; ++i) {
for (k = 0; k < K; ++k) { for (k = 0; k < K; ++k) {
register float A_PART = ALPHA*A[i*lda + k]; float A_PART = ALPHA * A[i * lda + k];
//printf("\n weight = %f \n", A_PART); //printf("\n weight = %f \n", A_PART);
for (j = 0; j < N; ++j) { for (j = 0; j < N; ++j) {
C[i*ldc + j] += A_PART*B[k*ldb + j]; C[i*ldc + j] += A_PART*B[k*ldb + j];
@ -695,8 +697,8 @@ void binary_align_weights(convolutional_layer *l)
size_t align_weights_size = new_lda * m; size_t align_weights_size = new_lda * m;
l->align_bit_weights_size = align_weights_size / 8 + 1; l->align_bit_weights_size = align_weights_size / 8 + 1;
float *align_weights = calloc(align_weights_size, sizeof(float)); float* align_weights = (float*)calloc(align_weights_size, sizeof(float));
l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char)); l->align_bit_weights = (char*)calloc(l->align_bit_weights_size, sizeof(char));
size_t i, j; size_t i, j;
// align A without transpose // align A without transpose
@ -739,7 +741,7 @@ void binary_align_weights(convolutional_layer *l)
//printf("\n l.index = %d \t aw[0] = %f, aw[1] = %f, aw[2] = %f, aw[3] = %f \n", l->index, align_weights[0], align_weights[1], align_weights[2], align_weights[3]); //printf("\n l.index = %d \t aw[0] = %f, aw[1] = %f, aw[2] = %f, aw[3] = %f \n", l->index, align_weights[0], align_weights[1], align_weights[2], align_weights[3]);
//memcpy(l->binary_weights, align_weights, (l->size * l->size * l->c * l->n) * sizeof(float)); //memcpy(l->binary_weights, align_weights, (l->size * l->size * l->c * l->n) * sizeof(float));
float_to_bit(align_weights, l->align_bit_weights, align_weights_size); float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size);
//if (l->n >= 32) //if (l->n >= 32)
if(gpu_index >= 0) if(gpu_index >= 0)
@ -757,7 +759,7 @@ void binary_align_weights(convolutional_layer *l)
//get_mean_array(l->binary_weights, m*new_lda, l->n, l->mean_arr); //get_mean_array(l->binary_weights, m*new_lda, l->n, l->mean_arr);
} }
else { else {
float_to_bit(align_weights, l->align_bit_weights, align_weights_size); float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size);
get_mean_array(l->binary_weights, m*k, l->n, l->mean_arr); get_mean_array(l->binary_weights, m*k, l->n, l->mean_arr);
} }
@ -808,7 +810,7 @@ size_t binary_transpose_align_input(int k, int n, float *b, char **t_bit_input,
// t_bit_input - [new_ldb, n] - [k', n] // t_bit_input - [new_ldb, n] - [k', n]
//transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8); //transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8);
transpose_bin(b, *t_bit_input, k, n, bit_align, new_ldb, 8); transpose_bin((uint32_t*)b, (uint32_t*)*t_bit_input, k, n, bit_align, new_ldb, 8);
return t_intput_size; return t_intput_size;
} }
@ -874,7 +876,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
repack_input(state.input, state.workspace, l.w, l.h, l.c); repack_input(state.input, state.workspace, l.w, l.h, l.c);
// 32 x floats -> 1 x uint32_t // 32 x floats -> 1 x uint32_t
float_to_bit(state.workspace, (char *)l.bin_re_packed_input, l.c * l.w * l.h); float_to_bit(state.workspace, (unsigned char *)l.bin_re_packed_input, l.c * l.w * l.h);
//free(re_packed_input); //free(re_packed_input);
@ -900,10 +902,10 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
// // then exit from if() // // then exit from if()
transpose_uint32((uint32_t *)state.workspace, l.t_bit_input, new_k, n, n, new_ldb); transpose_uint32((uint32_t *)state.workspace, (uint32_t*)l.t_bit_input, new_k, n, n, new_ldb);
// the main GEMM function // the main GEMM function
gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, l.t_bit_input, new_ldb, c, n, l.mean_arr); gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr);
// // alternative GEMM // // alternative GEMM
//gemm_nn_bin_transposed_32bit_packed(m, n, new_k, 1, //gemm_nn_bin_transposed_32bit_packed(m, n, new_k, 1,
@ -945,7 +947,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align); size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align);
// 5x times faster than gemm()-float32 // 5x times faster than gemm()-float32
gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, l.t_bit_input, new_ldb, c, n, l.mean_arr); gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr);
//gemm_nn_custom_bin_mean_transposed(m, n, k, 1, bit_weights, k, t_bit_input, new_ldb, c, n, mean_arr); //gemm_nn_custom_bin_mean_transposed(m, n, k, 1, bit_weights, k, t_bit_input, new_ldb, c, n, mean_arr);
@ -1074,7 +1076,7 @@ void rescale_weights(convolutional_layer l, float scale, float trans)
image *get_weights(convolutional_layer l) image *get_weights(convolutional_layer l)
{ {
image *weights = calloc(l.n, sizeof(image)); image* weights = (image*)calloc(l.n, sizeof(image));
int i; int i;
for(i = 0; i < l.n; ++i){ for(i = 0; i < l.n; ++i){
weights[i] = copy_image(get_convolutional_weight(l, i)); weights[i] = copy_image(get_convolutional_weight(l, i));
@ -1097,4 +1099,3 @@ image *visualize_convolutional_layer(convolutional_layer l, char *window, image
free_image(dc); free_image(dc);
return single_weights; return single_weights;
} }

View File

@ -9,6 +9,9 @@
typedef layer convolutional_layer; typedef layer convolutional_layer;
#ifdef __cplusplus
extern "C" {
#endif
#ifdef GPU #ifdef GPU
void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state); void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state); void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
@ -22,11 +25,11 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
#ifdef CUDNN #ifdef CUDNN
void cudnn_convolutional_setup(layer *l, int cudnn_preference); void cudnn_convolutional_setup(layer *l, int cudnn_preference);
void create_convolutional_cudnn_tensors(layer *l); void create_convolutional_cudnn_tensors(layer *l);
size_t get_convolutional_workspace_size(layer l);
void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
#endif #endif
#endif #endif
size_t get_convolutional_workspace_size(layer l);
convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index); convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index);
void denormalize_convolutional_layer(convolutional_layer l); void denormalize_convolutional_layer(convolutional_layer l);
void resize_convolutional_layer(convolutional_layer *layer, int w, int h); void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
@ -53,5 +56,8 @@ int convolutional_out_width(convolutional_layer layer);
void rescale_weights(convolutional_layer l, float scale, float trans); void rescale_weights(convolutional_layer l, float scale, float trans);
void rgbgr_weights(convolutional_layer l); void rgbgr_weights(convolutional_layer l);
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -32,7 +32,7 @@ char *get_cost_string(COST_TYPE a)
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
{ {
fprintf(stderr, "cost %4d\n", inputs); fprintf(stderr, "cost %4d\n", inputs);
cost_layer l = {0}; cost_layer l = { (LAYER_TYPE)0 };
l.type = COST; l.type = COST;
l.scale = scale; l.scale = scale;
@ -40,9 +40,9 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca
l.inputs = inputs; l.inputs = inputs;
l.outputs = inputs; l.outputs = inputs;
l.cost_type = cost_type; l.cost_type = cost_type;
l.delta = calloc(inputs*batch, sizeof(float)); l.delta = (float*)calloc(inputs * batch, sizeof(float));
l.output = calloc(inputs*batch, sizeof(float)); l.output = (float*)calloc(inputs * batch, sizeof(float));
l.cost = calloc(1, sizeof(float)); l.cost = (float*)calloc(1, sizeof(float));
l.forward = forward_cost_layer; l.forward = forward_cost_layer;
l.backward = backward_cost_layer; l.backward = backward_cost_layer;
@ -60,8 +60,8 @@ void resize_cost_layer(cost_layer *l, int inputs)
{ {
l->inputs = inputs; l->inputs = inputs;
l->outputs = inputs; l->outputs = inputs;
l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); l->delta = (float*)realloc(l->delta, inputs * l->batch * sizeof(float));
l->output = realloc(l->output, inputs*l->batch*sizeof(float)); l->output = (float*)realloc(l->output, inputs * l->batch * sizeof(float));
#ifdef GPU #ifdef GPU
cuda_free(l->delta_gpu); cuda_free(l->delta_gpu);
cuda_free(l->output_gpu); cuda_free(l->output_gpu);

View File

@ -5,6 +5,9 @@
typedef layer cost_layer; typedef layer cost_layer;
#ifdef __cplusplus
extern "C" {
#endif
COST_TYPE get_cost_type(char *s); COST_TYPE get_cost_type(char *s);
char *get_cost_string(COST_TYPE a); char *get_cost_string(COST_TYPE a);
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
@ -17,4 +20,7 @@ void forward_cost_layer_gpu(cost_layer l, network_state state);
void backward_cost_layer_gpu(const cost_layer l, network_state state); void backward_cost_layer_gpu(const cost_layer l, network_state state);
#endif #endif
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -9,7 +9,7 @@ void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
int i,j,k; int i,j,k;
for(i = 0; i < M; ++i){ for(i = 0; i < M; ++i){
for(k = 0; k < K; ++k){ for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[i*lda+k]; float A_PART = ALPHA * A[i * lda + k];
for(j = 0; j < N; ++j){ for(j = 0; j < N; ++j){
C[i*ldc+j] += A_PART*B[k*ldb+j]; C[i*ldc+j] += A_PART*B[k*ldb+j];
} }
@ -26,7 +26,7 @@ void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
int i,j,k; int i,j,k;
for(i = 0; i < M; ++i){ for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){ for(j = 0; j < N; ++j){
register float sum = 0; float sum = 0;
for(k = 0; k < K; ++k){ for(k = 0; k < K; ++k){
sum += ALPHA*A[i*lda+k]*B[k+j*ldb]; sum += ALPHA*A[i*lda+k]*B[k+j*ldb];
} }
@ -44,7 +44,7 @@ void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
int i,j,k; int i,j,k;
for(i = 0; i < M; ++i){ for(i = 0; i < M; ++i){
for(k = 0; k < K; ++k){ for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[k*lda+i]; float A_PART = ALPHA * A[k * lda + i];
for(j = 0; j < N; ++j){ for(j = 0; j < N; ++j){
C[i*ldc+j] += A_PART*B[k*ldb+j]; C[i*ldc+j] += A_PART*B[k*ldb+j];
} }

View File

@ -30,7 +30,7 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
{ {
fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
batch = batch / steps; batch = batch / steps;
layer l = {0}; layer l = { (LAYER_TYPE)0 };
l.batch = batch; l.batch = batch;
l.type = CRNN; l.type = CRNN;
l.steps = steps; l.steps = steps;
@ -44,22 +44,19 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
l.hidden = h * w * hidden_filters; l.hidden = h * w * hidden_filters;
l.outputs = l.out_h * l.out_w * l.out_c; l.outputs = l.out_h * l.out_w * l.out_c;
l.state = calloc(l.hidden*batch*(steps+1), sizeof(float)); l.state = (float*)calloc(l.hidden * batch * (steps + 1), sizeof(float));
l.input_layer = malloc(sizeof(layer)); l.input_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "");
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0); *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
l.input_layer->batch = batch; l.input_layer->batch = batch;
if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
l.self_layer = malloc(sizeof(layer)); l.self_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "");
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0); *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
l.self_layer->batch = batch; l.self_layer->batch = batch;
if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
l.output_layer = malloc(sizeof(layer)); l.output_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "");
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0); *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
l.output_layer->batch = batch; l.output_layer->batch = batch;
if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;

View File

@ -6,6 +6,9 @@
#include "layer.h" #include "layer.h"
#include "network.h" #include "network.h"
#ifdef __cplusplus
extern "C" {
#endif
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize); layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize);
void forward_crnn_layer(layer l, network_state state); void forward_crnn_layer(layer l, network_state state);
@ -20,5 +23,8 @@ void push_crnn_layer(layer l);
void pull_crnn_layer(layer l); void pull_crnn_layer(layer l);
#endif #endif
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -16,7 +16,7 @@ void backward_crop_layer_gpu(const crop_layer l, network_state state){}
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
{ {
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
crop_layer l = {0}; crop_layer l = { (LAYER_TYPE)0 };
l.type = CROP; l.type = CROP;
l.batch = batch; l.batch = batch;
l.h = h; l.h = h;
@ -32,7 +32,7 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int
l.out_c = c; l.out_c = c;
l.inputs = l.w * l.h * l.c; l.inputs = l.w * l.h * l.c;
l.outputs = l.out_w * l.out_h * l.out_c; l.outputs = l.out_w * l.out_h * l.out_c;
l.output = calloc(l.outputs*batch, sizeof(float)); l.output = (float*)calloc(l.outputs * batch, sizeof(float));
l.forward = forward_crop_layer; l.forward = forward_crop_layer;
l.backward = backward_crop_layer; l.backward = backward_crop_layer;
@ -56,7 +56,7 @@ void resize_crop_layer(layer *l, int w, int h)
l->inputs = l->w * l->h * l->c; l->inputs = l->w * l->h * l->c;
l->outputs = l->out_h * l->out_w * l->out_c; l->outputs = l->out_h * l->out_w * l->out_c;
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
#ifdef GPU #ifdef GPU
cuda_free(l->output_gpu); cuda_free(l->output_gpu);
l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);

View File

@ -7,6 +7,9 @@
typedef layer crop_layer; typedef layer crop_layer;
#ifdef __cplusplus
extern "C" {
#endif
image get_crop_image(crop_layer l); image get_crop_image(crop_layer l);
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
void forward_crop_layer(const crop_layer l, network_state state); void forward_crop_layer(const crop_layer l, network_state state);
@ -16,5 +19,8 @@ void resize_crop_layer(layer *l, int w, int h);
void forward_crop_layer_gpu(crop_layer l, network_state state); void forward_crop_layer_gpu(crop_layer l, network_state state);
#endif #endif
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -2,12 +2,10 @@
#include "curand.h" #include "curand.h"
#include "cublas_v2.h" #include "cublas_v2.h"
extern "C" {
#include "crop_layer.h" #include "crop_layer.h"
#include "utils.h" #include "utils.h"
#include "cuda.h" #include "cuda.h"
#include "image.h" #include "image.h"
}
__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) __device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
{ {

View File

@ -1,4 +1,10 @@
#ifdef __cplusplus
extern "C" {
#endif
int gpu_index = 0; int gpu_index = 0;
#ifdef __cplusplus
}
#endif // __cplusplus
#ifdef GPU #ifdef GPU
@ -71,7 +77,7 @@ dim3 cuda_gridsize(size_t n){
x = ceil(sqrt(k)); x = ceil(sqrt(k));
y = (n-1)/(x*BLOCK) + 1; y = (n-1)/(x*BLOCK) + 1;
} }
dim3 d = {x, y, 1}; dim3 d = { (unsigned int)x, (unsigned int)y, 1 };
//printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
return d; return d;
} }
@ -188,7 +194,7 @@ cublasHandle_t blas_handle()
if(!init[i]) { if(!init[i]) {
cublasCreate(&handle[i]); cublasCreate(&handle[i]);
cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream()); cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream());
CHECK_CUDA(status); CHECK_CUDA((cudaError_t)status);
init[i] = 1; init[i] = 1;
} }
return handle[i]; return handle[i];
@ -226,7 +232,7 @@ void cuda_random(float *x_gpu, size_t n)
float cuda_compare(float *x_gpu, float *x, size_t n, char *s) float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
{ {
float *tmp = calloc(n, sizeof(float)); float* tmp = (float*)calloc(n, sizeof(float));
cuda_pull_array(x_gpu, tmp, n); cuda_pull_array(x_gpu, tmp, n);
//int i; //int i;
//for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
@ -310,6 +316,6 @@ int get_gpu_compute_capability(int i)
} }
#else // GPU #else // GPU
#include "cuda.h" #include "darknet.h"
void cuda_set_device(int n) {} void cuda_set_device(int n) {}
#endif // GPU #endif // GPU

View File

@ -1,25 +1,27 @@
#ifndef CUDA_H #ifndef DARKCUDA_H
#define CUDA_H #define DARKCUDA_H
#include "darknet.h" #include "darknet.h"
#if defined(_MSC_VER) && _MSC_VER < 1900 #ifdef __cplusplus
#define inline __inline extern "C" {
#endif #endif
extern int gpu_index; extern int gpu_index;
#ifdef __cplusplus
}
#endif // __cplusplus
#ifdef GPU #ifdef GPU
#define BLOCK 512
#include "cuda_runtime.h" #include <cuda_runtime.h>
#include "curand.h" #include <curand.h>
#include "cublas_v2.h" #include <cublas_v2.h>
#include "cuda_runtime_api.h" #include <cuda_runtime_api.h>
//#include "driver_types.h" #include <driver_types.h>
#ifdef CUDNN #ifdef CUDNN
#include "cudnn.h" #include <cudnn.h>
#endif // CUDNN #endif // CUDNN
#ifndef __DATE__ #ifndef __DATE__
@ -65,9 +67,6 @@ extern "C" {
cudaStream_t get_cuda_memcpy_stream(); cudaStream_t get_cuda_memcpy_stream();
int get_number_of_blocks(int array_size, int block_size); int get_number_of_blocks(int array_size, int block_size);
int get_gpu_compute_capability(int i); int get_gpu_compute_capability(int i);
#ifdef __cplusplus
}
#endif // __cplusplus
#ifdef CUDNN #ifdef CUDNN
cudnnHandle_t cudnn_handle(); cudnnHandle_t cudnn_handle();
@ -77,6 +76,10 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line
#define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ );
#endif #endif
#ifdef __cplusplus
}
#endif // __cplusplus
#else // GPU #else // GPU
//LIB_API void cuda_set_device(int n); //LIB_API void cuda_set_device(int n);
#endif // GPU #endif // GPU

View File

@ -10,7 +10,7 @@
#include "connected_layer.h" #include "connected_layer.h"
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#endif #endif
extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
@ -258,12 +258,12 @@ layer normalize_layer(layer l, int n)
{ {
int j; int j;
l.batch_normalize=1; l.batch_normalize=1;
l.scales = calloc(n, sizeof(float)); l.scales = (float*)calloc(n, sizeof(float));
for(j = 0; j < n; ++j){ for(j = 0; j < n; ++j){
l.scales[j] = 1; l.scales[j] = 1;
} }
l.rolling_mean = calloc(n, sizeof(float)); l.rolling_mean = (float*)calloc(n, sizeof(float));
l.rolling_variance = calloc(n, sizeof(float)); l.rolling_variance = (float*)calloc(n, sizeof(float));
return l; return l;
} }

View File

@ -1,3 +1,4 @@
#ifdef _WIN32
#ifndef _UNISTD_H #ifndef _UNISTD_H
#define _UNISTD_H 1 #define _UNISTD_H 1
@ -6,12 +7,13 @@
* Please add functionality as needed * Please add functionality as needed
*/ */
#include <stdlib.h> #include <Winsock2.h>
#include <direct.h> /* for _getcwd() and _chdir() */
#include <getopt.h>
#include <io.h> #include <io.h>
#include <process.h> /* for getpid() and the exec..() family */ #include <process.h> /* for getpid() and the exec..() family */
#include <direct.h> /* for _getcwd() and _chdir() */ #include <stdlib.h>
#include "getopt.h" /* getopt at: https://gist.github.com/ashelly/7776712 */
#define srandom srand #define srandom srand
#define random rand #define random rand
@ -19,7 +21,7 @@
These may be OR'd together. */ These may be OR'd together. */
#define R_OK 4 /* Test for read permission. */ #define R_OK 4 /* Test for read permission. */
#define W_OK 2 /* Test for write permission. */ #define W_OK 2 /* Test for write permission. */
//#define X_OK 1 /* execute permission - unsupported in windows*/ #define X_OK R_OK /* execute permission - unsupported in Windows, \
#define F_OK 0 /* Test for existence. */ #define F_OK 0 /* Test for existence. */
#define access _access #define access _access
@ -48,5 +50,7 @@ These may be OR'd together. */
//typedef unsigned __int16 uint16_t; //typedef unsigned __int16 uint16_t;
//typedef unsigned __int32 uint32_t; //typedef unsigned __int32 uint32_t;
//typedef unsigned __int64 uint64_t; //typedef unsigned __int64 uint64_t;
#endif /* _UNISTD_H */
#endif /* unistd.h */ #else
#include <unistd.h>
#endif /* _WIN32 */

View File

@ -41,7 +41,7 @@ char **get_random_paths_indexes(char **paths, int n, int m, int *indexes)
char **get_random_paths(char **paths, int n, int m) char **get_random_paths(char **paths, int n, int m)
{ {
char **random_paths = calloc(n, sizeof(char*)); char** random_paths = (char**)calloc(n, sizeof(char*));
int i; int i;
pthread_mutex_lock(&mutex); pthread_mutex_lock(&mutex);
//printf("n = %d \n", n); //printf("n = %d \n", n);
@ -60,7 +60,7 @@ char **get_random_paths(char **paths, int n, int m)
char **find_replace_paths(char **paths, int n, char *find, char *replace) char **find_replace_paths(char **paths, int n, char *find, char *replace)
{ {
char **replace_paths = calloc(n, sizeof(char*)); char** replace_paths = (char**)calloc(n, sizeof(char*));
int i; int i;
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
char replaced[4096]; char replaced[4096];
@ -75,7 +75,7 @@ matrix load_image_paths_gray(char **paths, int n, int w, int h)
int i; int i;
matrix X; matrix X;
X.rows = n; X.rows = n;
X.vals = calloc(X.rows, sizeof(float*)); X.vals = (float**)calloc(X.rows, sizeof(float*));
X.cols = 0; X.cols = 0;
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
@ -96,7 +96,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
int i; int i;
matrix X; matrix X;
X.rows = n; X.rows = n;
X.vals = calloc(X.rows, sizeof(float*)); X.vals = (float**)calloc(X.rows, sizeof(float*));
X.cols = 0; X.cols = 0;
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
@ -112,7 +112,7 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
int i; int i;
matrix X; matrix X;
X.rows = n; X.rows = n;
X.vals = calloc(X.rows, sizeof(float*)); X.vals = (float**)calloc(X.rows, sizeof(float*));
X.cols = 0; X.cols = 0;
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
@ -139,7 +139,7 @@ extern int check_mistakes;
box_label *read_boxes(char *filename, int *n) box_label *read_boxes(char *filename, int *n)
{ {
box_label *boxes = calloc(1, sizeof(box_label)); box_label* boxes = (box_label*)calloc(1, sizeof(box_label));
FILE *file = fopen(filename, "r"); FILE *file = fopen(filename, "r");
if (!file) { if (!file) {
printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename); printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename);
@ -158,7 +158,7 @@ box_label *read_boxes(char *filename, int *n)
int id; int id;
int count = 0; int count = 0;
while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
boxes = realloc(boxes, (count+1)*sizeof(box_label)); boxes = (box_label*)realloc(boxes, (count + 1) * sizeof(box_label));
boxes[count].id = id; boxes[count].id = id;
boxes[count].x = x; boxes[count].x = x;
boxes[count].y = y; boxes[count].y = y;
@ -300,7 +300,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
free(boxes); free(boxes);
} }
void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy, void fill_truth_detection(const char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
int small_object, int net_w, int net_h) int small_object, int net_w, int net_h)
{ {
char labelpath[4096]; char labelpath[4096];
@ -391,7 +391,6 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
free(boxes); free(boxes);
} }
#define NUMCHARS 37
void print_letters(float *pred, int n) void print_letters(float *pred, int n)
{ {
@ -565,7 +564,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
d.shallow = 0; d.shallow = 0;
d.X.rows = n; d.X.rows = n;
d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
d.X.cols = h*w*3; d.X.cols = h*w*3;
@ -619,7 +618,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
d.shallow = 0; d.shallow = 0;
d.X.rows = n; d.X.rows = n;
d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
d.X.cols = h*w*6; d.X.cols = h*w*6;
int k = 2*(classes); int k = 2*(classes);
@ -628,7 +627,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
image im1 = load_image_color(paths[i*2], w, h); image im1 = load_image_color(paths[i*2], w, h);
image im2 = load_image_color(paths[i*2+1], w, h); image im2 = load_image_color(paths[i*2+1], w, h);
d.X.vals[i] = calloc(d.X.cols, sizeof(float)); d.X.vals[i] = (float*)calloc(d.X.cols, sizeof(float));
memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float));
memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float));
@ -690,7 +689,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
d.h = h; d.h = h;
d.X.rows = 1; d.X.rows = 1;
d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
d.X.cols = h*w*3; d.X.cols = h*w*3;
int k = (4+classes)*30; int k = (4+classes)*30;
@ -729,12 +728,12 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
} }
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#include "opencv2/imgproc/imgproc_c.h" #include <opencv2/imgproc/imgproc_c.h>
#include "opencv2/core/version.hpp" #include <opencv2/core/version.hpp>
#ifndef CV_VERSION_EPOCH #ifndef CV_VERSION_EPOCH
#include "opencv2/videoio/videoio_c.h" #include <opencv2/videoio/videoio_c.h>
#include "opencv2/imgcodecs/imgcodecs_c.h" #include <opencv2/imgcodecs/imgcodecs_c.h>
#endif #endif
#include "http_stream.h" #include "http_stream.h"
@ -748,7 +747,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
d.shallow = 0; d.shallow = 0;
d.X.rows = n; d.X.rows = n;
d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
d.X.cols = h*w*c; d.X.cols = h*w*c;
d.y = make_matrix(n, 5*boxes); d.y = make_matrix(n, 5*boxes);
@ -817,7 +816,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
d.shallow = 0; d.shallow = 0;
d.X.rows = n; d.X.rows = n;
d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
d.X.cols = h*w*c; d.X.cols = h*w*c;
d.y = make_matrix(n, 5 * boxes); d.y = make_matrix(n, 5 * boxes);
@ -903,7 +902,7 @@ void *load_thread(void *ptr)
pthread_t load_data_in_thread(load_args args) pthread_t load_data_in_thread(load_args args)
{ {
pthread_t thread; pthread_t thread;
struct load_args *ptr = calloc(1, sizeof(struct load_args)); struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
*ptr = args; *ptr = args;
if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed");
return thread; return thread;
@ -918,8 +917,8 @@ void *load_threads(void *ptr)
data *out = args.d; data *out = args.d;
int total = args.n; int total = args.n;
free(ptr); free(ptr);
data *buffers = calloc(args.threads, sizeof(data)); data* buffers = (data*)calloc(args.threads, sizeof(data));
pthread_t *threads = calloc(args.threads, sizeof(pthread_t)); pthread_t* threads = (pthread_t*)calloc(args.threads, sizeof(pthread_t));
for(i = 0; i < args.threads; ++i){ for(i = 0; i < args.threads; ++i){
args.d = buffers + i; args.d = buffers + i;
args.n = (i+1) * total/args.threads - i * total/args.threads; args.n = (i+1) * total/args.threads - i * total/args.threads;
@ -942,7 +941,7 @@ void *load_threads(void *ptr)
pthread_t load_data(load_args args) pthread_t load_data(load_args args)
{ {
pthread_t thread; pthread_t thread;
struct load_args *ptr = calloc(1, sizeof(struct load_args)); struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
*ptr = args; *ptr = args;
if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
return thread; return thread;
@ -996,11 +995,11 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
int i; int i;
d.X.rows = n; d.X.rows = n;
d.X.vals = calloc(n, sizeof(float*)); d.X.vals = (float**)calloc(n, sizeof(float*));
d.X.cols = w*h*3; d.X.cols = w*h*3;
d.y.rows = n; d.y.rows = n;
d.y.vals = calloc(n, sizeof(float*)); d.y.vals = (float**)calloc(n, sizeof(float*));
d.y.cols = w*scale * h*scale * 3; d.y.cols = w*scale * h*scale * 3;
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
@ -1048,7 +1047,7 @@ matrix concat_matrix(matrix m1, matrix m2)
matrix m; matrix m;
m.cols = m1.cols; m.cols = m1.cols;
m.rows = m1.rows+m2.rows; m.rows = m1.rows+m2.rows;
m.vals = calloc(m1.rows + m2.rows, sizeof(float*)); m.vals = (float**)calloc(m1.rows + m2.rows, sizeof(float*));
for(i = 0; i < m1.rows; ++i){ for(i = 0; i < m1.rows; ++i){
m.vals[count++] = m1.vals[i]; m.vals[count++] = m1.vals[i];
} }
@ -1072,9 +1071,9 @@ data concat_datas(data *d, int n)
int i; int i;
data out = {0}; data out = {0};
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
data new = concat_data(d[i], out); data newdata = concat_data(d[i], out);
free_data(out); free_data(out);
out = new; out = newdata;
} }
return out; return out;
} }
@ -1300,8 +1299,8 @@ data get_random_data(data d, int num)
r.X.cols = d.X.cols; r.X.cols = d.X.cols;
r.y.cols = d.y.cols; r.y.cols = d.y.cols;
r.X.vals = calloc(num, sizeof(float *)); r.X.vals = (float**)calloc(num, sizeof(float*));
r.y.vals = calloc(num, sizeof(float *)); r.y.vals = (float**)calloc(num, sizeof(float*));
int i; int i;
for(i = 0; i < num; ++i){ for(i = 0; i < num; ++i){
@ -1314,7 +1313,7 @@ data get_random_data(data d, int num)
data *split_data(data d, int part, int total) data *split_data(data d, int part, int total)
{ {
data *split = calloc(2, sizeof(data)); data* split = (data*)calloc(2, sizeof(data));
int i; int i;
int start = part*d.X.rows/total; int start = part*d.X.rows/total;
int end = (part+1)*d.X.rows/total; int end = (part+1)*d.X.rows/total;
@ -1327,10 +1326,10 @@ data *split_data(data d, int part, int total)
train.X.cols = test.X.cols = d.X.cols; train.X.cols = test.X.cols = d.X.cols;
train.y.cols = test.y.cols = d.y.cols; train.y.cols = test.y.cols = d.y.cols;
train.X.vals = calloc(train.X.rows, sizeof(float*)); train.X.vals = (float**)calloc(train.X.rows, sizeof(float*));
test.X.vals = calloc(test.X.rows, sizeof(float*)); test.X.vals = (float**)calloc(test.X.rows, sizeof(float*));
train.y.vals = calloc(train.y.rows, sizeof(float*)); train.y.vals = (float**)calloc(train.y.rows, sizeof(float*));
test.y.vals = calloc(test.y.rows, sizeof(float*)); test.y.vals = (float**)calloc(test.y.rows, sizeof(float*));
for(i = 0; i < start; ++i){ for(i = 0; i < start; ++i){
train.X.vals[i] = d.X.vals[i]; train.X.vals[i] = d.X.vals[i];

View File

@ -2,14 +2,14 @@
#define DATA_H #define DATA_H
#include <pthread.h> #include <pthread.h>
#if defined(_MSC_VER) && _MSC_VER < 1900 #include "darknet.h"
#define inline __inline
#endif
#include "darknet.h" #include "darknet.h"
#include "matrix.h" #include "matrix.h"
#include "list.h" #include "list.h"
#include "image.h" #include "image.h"
#ifdef __cplusplus
extern "C" {
#endif
#include "tree.h" #include "tree.h"
static inline float distance_from_edge(int x, int max) static inline float distance_from_edge(int x, int max)
@ -115,5 +115,8 @@ data *split_data(data d, int part, int total);
data concat_data(data d1, data d2); data concat_data(data d1, data d2);
data concat_datas(data *d, int n); data concat_datas(data *d, int n);
void fill_truth(char *path, char **labels, int k, float *truth); void fill_truth(char *path, char **labels, int k, float *truth);
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -2,7 +2,6 @@
#include "curand.h" #include "curand.h"
#include "cublas_v2.h" #include "cublas_v2.h"
extern "C" {
#include "convolutional_layer.h" #include "convolutional_layer.h"
#include "deconvolutional_layer.h" #include "deconvolutional_layer.h"
#include "gemm.h" #include "gemm.h"
@ -11,7 +10,6 @@ extern "C" {
#include "col2im.h" #include "col2im.h"
#include "utils.h" #include "utils.h"
#include "cuda.h" #include "cuda.h"
}
extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
{ {
@ -95,7 +93,7 @@ extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
} }
extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay) extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay)
{ {
int size = layer.size*layer.size*layer.c*layer.n; int size = layer.size*layer.size*layer.c*layer.n;

View File

@ -46,7 +46,7 @@ image get_deconvolutional_delta(deconvolutional_layer l)
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation) deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
{ {
int i; int i;
deconvolutional_layer l = {0}; deconvolutional_layer l = { (LAYER_TYPE)0 };
l.type = DECONVOLUTIONAL; l.type = DECONVOLUTIONAL;
l.h = h; l.h = h;
@ -57,11 +57,11 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
l.stride = stride; l.stride = stride;
l.size = size; l.size = size;
l.weights = calloc(c*n*size*size, sizeof(float)); l.weights = (float*)calloc(c * n * size * size, sizeof(float));
l.weight_updates = calloc(c*n*size*size, sizeof(float)); l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
l.biases = calloc(n, sizeof(float)); l.biases = (float*)calloc(n, sizeof(float));
l.bias_updates = calloc(n, sizeof(float)); l.bias_updates = (float*)calloc(n, sizeof(float));
float scale = 1./sqrt(size*size*c); float scale = 1./sqrt(size*size*c);
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
@ -76,9 +76,9 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
l.outputs = l.out_w * l.out_h * l.out_c; l.outputs = l.out_w * l.out_h * l.out_c;
l.inputs = l.w * l.h * l.c; l.inputs = l.w * l.h * l.c;
l.col_image = calloc(h*w*size*size*n, sizeof(float)); l.col_image = (float*)calloc(h * w * size * size * n, sizeof(float));
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
l.forward = forward_deconvolutional_layer; l.forward = forward_deconvolutional_layer;
l.backward = backward_deconvolutional_layer; l.backward = backward_deconvolutional_layer;
@ -110,11 +110,11 @@ void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w)
int out_h = deconvolutional_out_height(*l); int out_h = deconvolutional_out_height(*l);
int out_w = deconvolutional_out_width(*l); int out_w = deconvolutional_out_width(*l);
l->col_image = realloc(l->col_image, l->col_image = (float*)realloc(l->col_image,
out_h*out_w*l->size*l->size*l->c*sizeof(float)); out_h*out_w*l->size*l->size*l->c*sizeof(float));
l->output = realloc(l->output, l->output = (float*)realloc(l->output,
l->batch*out_h * out_w * l->n*sizeof(float)); l->batch*out_h * out_w * l->n*sizeof(float));
l->delta = realloc(l->delta, l->delta = (float*)realloc(l->delta,
l->batch*out_h * out_w * l->n*sizeof(float)); l->batch*out_h * out_w * l->n*sizeof(float));
#ifdef GPU #ifdef GPU
cuda_free(l->col_image_gpu); cuda_free(l->col_image_gpu);
@ -191,7 +191,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
} }
} }
void update_deconvolutional_layer(deconvolutional_layer l, float learning_rate, float momentum, float decay) void update_deconvolutional_layer(deconvolutional_layer l, int skip, float learning_rate, float momentum, float decay)
{ {
int size = l.size*l.size*l.c*l.n; int size = l.size*l.size*l.c*l.n;
axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1); axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1);

View File

@ -9,10 +9,13 @@
typedef layer deconvolutional_layer; typedef layer deconvolutional_layer;
#ifdef __cplusplus
extern "C" {
#endif
#ifdef GPU #ifdef GPU
void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay); void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay);
void push_deconvolutional_layer(deconvolutional_layer layer); void push_deconvolutional_layer(deconvolutional_layer layer);
void pull_deconvolutional_layer(deconvolutional_layer layer); void pull_deconvolutional_layer(deconvolutional_layer layer);
#endif #endif
@ -20,7 +23,7 @@ void pull_deconvolutional_layer(deconvolutional_layer layer);
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation); deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w); void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state); void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay); void update_deconvolutional_layer(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay);
void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state); void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
image get_deconvolutional_image(deconvolutional_layer layer); image get_deconvolutional_image(deconvolutional_layer layer);
@ -30,5 +33,8 @@ image get_deconvolutional_filter(deconvolutional_layer layer, int i);
int deconvolutional_out_height(deconvolutional_layer layer); int deconvolutional_out_height(deconvolutional_layer layer);
int deconvolutional_out_width(deconvolutional_layer layer); int deconvolutional_out_width(deconvolutional_layer layer);
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -9,20 +9,18 @@
#include "demo.h" #include "demo.h"
#ifdef WIN32 #ifdef WIN32
#include <time.h> #include <time.h>
#include <winsock.h>
#include "gettimeofday.h" #include "gettimeofday.h"
#else #else
#include <sys/time.h> #include <sys/time.h>
#endif #endif
#define FRAMES 3
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#include "opencv2/imgproc/imgproc_c.h" #include <opencv2/imgproc/imgproc_c.h>
#include "opencv2/core/version.hpp" #include <opencv2/core/version.hpp>
#ifndef CV_VERSION_EPOCH #ifndef CV_VERSION_EPOCH
#include "opencv2/videoio/videoio_c.h" #include <opencv2/videoio/videoio_c.h>
#endif #endif
#include "http_stream.h" #include "http_stream.h"
image get_image_from_stream(CvCapture *cap); image get_image_from_stream(CvCapture *cap);
@ -45,10 +43,10 @@ static int demo_ext_output = 0;
static long long int frame_id = 0; static long long int frame_id = 0;
static int demo_json_port = -1; static int demo_json_port = -1;
static float *predictions[FRAMES]; static float* predictions[NFRAMES];
static int demo_index = 0; static int demo_index = 0;
static image images[FRAMES]; static image images[NFRAMES];
static IplImage* ipl_images[FRAMES]; static IplImage* ipl_images[NFRAMES];
static float *avg; static float *avg;
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output); void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output);
@ -77,7 +75,7 @@ void *fetch_in_thread(void *ptr)
//error("Stream closed."); //error("Stream closed.");
printf("Stream closed.\n"); printf("Stream closed.\n");
flag_exit = 1; flag_exit = 1;
return EXIT_FAILURE; exit(EXIT_FAILURE);
} }
//in_s = resize_image(in, net.w, net.h); //in_s = resize_image(in, net.w, net.h);
@ -91,14 +89,14 @@ void *detect_in_thread(void *ptr)
float *prediction = network_predict(net, X); float *prediction = network_predict(net, X);
memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float)); memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
mean_arrays(predictions, FRAMES, l.outputs, avg); mean_arrays(predictions, NFRAMES, l.outputs, avg);
l.output = avg; l.output = avg;
free_image(det_s); free_image(det_s);
ipl_images[demo_index] = det_img; ipl_images[demo_index] = det_img;
det_img = ipl_images[(demo_index + FRAMES / 2 + 1) % FRAMES]; det_img = ipl_images[(demo_index + NFRAMES / 2 + 1) % NFRAMES];
demo_index = (demo_index + 1) % FRAMES; demo_index = (demo_index + 1) % NFRAMES;
if (letter_box) if (letter_box)
dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
@ -110,11 +108,11 @@ void *detect_in_thread(void *ptr)
double get_wall_time() double get_wall_time()
{ {
struct timeval time; struct timeval walltime;
if (gettimeofday(&time,NULL)){ if (gettimeofday(&walltime, NULL)) {
return 0; return 0;
} }
return (double)time.tv_sec + (double)time.tv_usec * .000001; return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001;
} }
void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
@ -161,8 +159,8 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
int j; int j;
avg = (float *) calloc(l.outputs, sizeof(float)); avg = (float *) calloc(l.outputs, sizeof(float));
for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); for(j = 0; j < NFRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3); for(j = 0; j < NFRAMES; ++j) images[j] = make_image(1,1,3);
if (l.classes != demo_classes) { if (l.classes != demo_classes) {
printf("Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes); printf("Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes);
@ -185,7 +183,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
det_img = in_img; det_img = in_img;
det_s = in_s; det_s = in_s;
for(j = 0; j < FRAMES/2; ++j){ for (j = 0; j < NFRAMES / 2; ++j) {
fetch_in_thread(0); fetch_in_thread(0);
detect_in_thread(0); detect_in_thread(0);
det_img = in_img; det_img = in_img;
@ -318,10 +316,10 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
free_image(in_s); free_image(in_s);
free(avg); free(avg);
for (j = 0; j < FRAMES; ++j) free(predictions[j]); for (j = 0; j < NFRAMES; ++j) free(predictions[j]);
for (j = 0; j < FRAMES; ++j) free_image(images[j]); for (j = 0; j < NFRAMES; ++j) free_image(images[j]);
free_ptrs(names, net.layers[net.n - 1].classes); free_ptrs((void **)names, net.layers[net.n - 1].classes);
int i; int i;
const int nsize = 8; const int nsize = 8;
@ -342,4 +340,3 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
} }
#endif #endif

View File

@ -1,8 +1,14 @@
#ifndef DEMO #ifndef DEMO_H
#define DEMO #define DEMO_H
#include "image.h" #include "image.h"
#ifdef __cplusplus
extern "C" {
#endif
void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int json_port, int dont_show, int ext_output); int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int json_port, int dont_show, int ext_output);
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -12,7 +12,7 @@
detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore)
{ {
detection_layer l = {0}; detection_layer l = { (LAYER_TYPE)0 };
l.type = DETECTION; l.type = DETECTION;
l.n = n; l.n = n;
@ -25,11 +25,11 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
l.w = side; l.w = side;
l.h = side; l.h = side;
assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs); assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
l.cost = calloc(1, sizeof(float)); l.cost = (float*)calloc(1, sizeof(float));
l.outputs = l.inputs; l.outputs = l.inputs;
l.truths = l.side*l.side*(1+l.coords+l.classes); l.truths = l.side*l.side*(1+l.coords+l.classes);
l.output = calloc(batch*l.outputs, sizeof(float)); l.output = (float*)calloc(batch * l.outputs, sizeof(float));
l.delta = calloc(batch*l.outputs, sizeof(float)); l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
l.forward = forward_detection_layer; l.forward = forward_detection_layer;
l.backward = backward_detection_layer; l.backward = backward_detection_layer;
@ -182,7 +182,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
} }
if(0){ if(0){
float *costs = calloc(l.batch*locations*l.n, sizeof(float)); float* costs = (float*)calloc(l.batch * locations * l.n, sizeof(float));
for (b = 0; b < l.batch; ++b) { for (b = 0; b < l.batch; ++b) {
int index = b*l.inputs; int index = b*l.inputs;
for (i = 0; i < locations; ++i) { for (i = 0; i < locations; ++i) {
@ -259,11 +259,11 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state)
return; return;
} }
float *in_cpu = calloc(l.batch*l.inputs, sizeof(float)); float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
float *truth_cpu = 0; float *truth_cpu = 0;
if(state.truth){ if(state.truth){
int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes); int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
truth_cpu = calloc(num_truth, sizeof(float)); truth_cpu = (float*)calloc(num_truth, sizeof(float));
cuda_pull_array(state.truth, truth_cpu, num_truth); cuda_pull_array(state.truth, truth_cpu, num_truth);
} }
cuda_pull_array(state.input, in_cpu, l.batch*l.inputs); cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);

View File

@ -6,6 +6,9 @@
typedef layer detection_layer; typedef layer detection_layer;
#ifdef __cplusplus
extern "C" {
#endif
detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
void forward_detection_layer(const detection_layer l, network_state state); void forward_detection_layer(const detection_layer l, network_state state);
void backward_detection_layer(const detection_layer l, network_state state); void backward_detection_layer(const detection_layer l, network_state state);
@ -17,4 +20,7 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state);
void backward_detection_layer_gpu(detection_layer l, network_state state); void backward_detection_layer_gpu(detection_layer l, network_state state);
#endif #endif
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -9,27 +9,33 @@
#include "option_list.h" #include "option_list.h"
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#include "opencv2/core/core_c.h" #include <opencv2/core/core_c.h>
//#include "opencv2/core/core.hpp" //#include "opencv2/core/core.hpp"
#include "opencv2/core/version.hpp" #include <opencv2/core/version.hpp>
#include "opencv2/imgproc/imgproc_c.h" #include <opencv2/imgproc/imgproc_c.h>
#ifndef CV_VERSION_EPOCH #ifndef CV_VERSION_EPOCH
#include "opencv2/videoio/videoio_c.h" #include <opencv2/videoio/videoio_c.h>
#define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION) #define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION)
#ifndef USE_CMAKE_LIBS
#pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib") #pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib")
#endif // USE_CMAKE_LIBS
#else #else
#define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)"" CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR) #define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)"" CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)
#ifndef USE_CMAKE_LIBS
#pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib") #pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib")
#pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib") #pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib")
#pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib") #pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib")
#endif // USE_CMAKE_LIBS
#endif #endif
IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show); IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches, void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port); float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port);
#endif // OPENCV
#ifndef CV_RGB
#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 ) #define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
#endif // OPENCV #endif // OPENCV
@ -81,7 +87,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
char *base = basecfg(cfgfile); char *base = basecfg(cfgfile);
printf("%s\n", base); printf("%s\n", base);
float avg_loss = -1; float avg_loss = -1;
network *nets = calloc(ngpus, sizeof(network)); network* nets = (network*)calloc(ngpus, sizeof(network));
srand(time(0)); srand(time(0));
int seed = rand(); int seed = rand();
@ -410,8 +416,8 @@ void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int
if (ymax > h) ymax = h; if (ymax > h) ymax = h;
for (j = 0; j < classes; ++j) { for (j = 0; j < classes; ++j) {
int class = j; int myclass = j;
if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[class], if (dets[i].prob[myclass]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[myclass],
xmin, ymin, xmax, ymax); xmin, ymin, xmax, ymax);
} }
} }
@ -465,7 +471,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
} }
else { else {
if (!outfile) outfile = "comp4_det_test_"; if (!outfile) outfile = "comp4_det_test_";
fps = calloc(classes, sizeof(FILE *)); fps = (FILE**)calloc(classes, sizeof(FILE*));
for (j = 0; j < classes; ++j) { for (j = 0; j < classes; ++j) {
snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
fps[j] = fopen(buff, "w"); fps[j] = fopen(buff, "w");
@ -482,11 +488,11 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
int nthreads = 4; int nthreads = 4;
if (m < 4) nthreads = m; if (m < 4) nthreads = m;
image *val = calloc(nthreads, sizeof(image)); image* val = (image*)calloc(nthreads, sizeof(image));
image *val_resized = calloc(nthreads, sizeof(image)); image* val_resized = (image*)calloc(nthreads, sizeof(image));
image *buf = calloc(nthreads, sizeof(image)); image* buf = (image*)calloc(nthreads, sizeof(image));
image *buf_resized = calloc(nthreads, sizeof(image)); image* buf_resized = (image*)calloc(nthreads, sizeof(image));
pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
load_args args = { 0 }; load_args args = { 0 };
args.w = net.w; args.w = net.w;
@ -702,11 +708,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
int nthreads = 4; int nthreads = 4;
if (m < 4) nthreads = m; if (m < 4) nthreads = m;
image *val = calloc(nthreads, sizeof(image)); image* val = (image*)calloc(nthreads, sizeof(image));
image *val_resized = calloc(nthreads, sizeof(image)); image* val_resized = (image*)calloc(nthreads, sizeof(image));
image *buf = calloc(nthreads, sizeof(image)); image* buf = (image*)calloc(nthreads, sizeof(image));
image *buf_resized = calloc(nthreads, sizeof(image)); image* buf_resized = (image*)calloc(nthreads, sizeof(image));
pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
load_args args = { 0 }; load_args args = { 0 };
args.w = net.w; args.w = net.w;
@ -720,11 +726,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
int tp_for_thresh = 0; int tp_for_thresh = 0;
int fp_for_thresh = 0; int fp_for_thresh = 0;
box_prob *detections = calloc(1, sizeof(box_prob)); box_prob* detections = (box_prob*)calloc(1, sizeof(box_prob));
int detections_count = 0; int detections_count = 0;
int unique_truth_count = 0; int unique_truth_count = 0;
int *truth_classes_count = calloc(classes, sizeof(int)); int* truth_classes_count = (int*)calloc(classes, sizeof(int));
for (t = 0; t < nthreads; ++t) { for (t = 0; t < nthreads; ++t) {
args.path = paths[i + t]; args.path = paths[i + t];
@ -798,7 +804,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
float prob = dets[i].prob[class_id]; float prob = dets[i].prob[class_id];
if (prob > 0) { if (prob > 0) {
detections_count++; detections_count++;
detections = realloc(detections, detections_count * sizeof(box_prob)); detections = (box_prob*)realloc(detections, detections_count * sizeof(box_prob));
detections[detections_count - 1].b = dets[i].bbox; detections[detections_count - 1].b = dets[i].bbox;
detections[detections_count - 1].p = prob; detections[detections_count - 1].p = prob;
detections[detections_count - 1].image_index = image_index; detections[detections_count - 1].image_index = image_index;
@ -890,14 +896,14 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
} pr_t; } pr_t;
// for PR-curve // for PR-curve
pr_t **pr = calloc(classes, sizeof(pr_t*)); pr_t** pr = (pr_t**)calloc(classes, sizeof(pr_t*));
for (i = 0; i < classes; ++i) { for (i = 0; i < classes; ++i) {
pr[i] = calloc(detections_count, sizeof(pr_t)); pr[i] = (pr_t*)calloc(detections_count, sizeof(pr_t));
} }
printf("\n detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count); printf("\n detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count);
int *truth_flags = calloc(unique_truth_count, sizeof(int)); int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int));
int rank; int rank;
for (rank = 0; rank < detections_count; ++rank) { for (rank = 0; rank < detections_count; ++rank) {
@ -993,7 +999,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
if (reinforcement_fd != NULL) fclose(reinforcement_fd); if (reinforcement_fd != NULL) fclose(reinforcement_fd);
// free memory // free memory
free_ptrs(names, net.layers[net.n - 1].classes); free_ptrs((void**)names, net.layers[net.n - 1].classes);
free_list_contents_kvp(options); free_list_contents_kvp(options);
free_list(options); free_list(options);
@ -1043,7 +1049,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
} }
//float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 }; //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 };
float *rel_width_height_array = calloc(1000, sizeof(float)); float* rel_width_height_array = (float*)calloc(1000, sizeof(float));
list *options = read_data_cfg(datacfg); list *options = read_data_cfg(datacfg);
@ -1079,7 +1085,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
if (check_mistakes) getchar(); if (check_mistakes) getchar();
} }
number_of_boxes++; number_of_boxes++;
rel_width_height_array = realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float)); rel_width_height_array = (float*)realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width; rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width;
rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height; rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height;
printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes); printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes);
@ -1104,7 +1110,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
// K-means // K-means
anchors_data = do_kmeans(boxes_data, num_of_clusters); anchors_data = do_kmeans(boxes_data, num_of_clusters);
qsort(anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), anchors_data_comparator); qsort((void*)anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), (__compar_fn_t)anchors_data_comparator);
//gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 //gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66
//float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 }; //float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 };
@ -1285,8 +1291,8 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
layer l = net.layers[net.n - 1]; layer l = net.layers[net.n - 1];
//box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); //box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
//float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); //float **probs = calloc(l.w*l.h*l.n, sizeof(float*));
//for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
float *X = sized.data; float *X = sized.data;
@ -1365,7 +1371,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
} }
// free memory // free memory
free_ptrs(names, net.layers[net.n - 1].classes); free_ptrs((void**)names, net.layers[net.n - 1].classes);
free_list_contents_kvp(options); free_list_contents_kvp(options);
free_list(options); free_list(options);
@ -1421,7 +1427,7 @@ void run_detector(int argc, char **argv)
for (i = 0; i < len; ++i) { for (i = 0; i < len; ++i) {
if (gpu_list[i] == ',') ++ngpus; if (gpu_list[i] == ',') ++ngpus;
} }
gpus = calloc(ngpus, sizeof(int)); gpus = (int*)calloc(ngpus, sizeof(int));
for (i = 0; i < ngpus; ++i) { for (i = 0; i < ngpus; ++i) {
gpus[i] = atoi(gpu_list); gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',') + 1; gpu_list = strchr(gpu_list, ',') + 1;

View File

@ -9,7 +9,7 @@ void train_dice(char *cfgfile, char *weightfile)
srand(time(0)); srand(time(0));
float avg_loss = -1; float avg_loss = -1;
char *base = basecfg(cfgfile); char *base = basecfg(cfgfile);
char *backup_directory = "/home/pjreddie/backup/"; char* backup_directory = "backup/";
printf("%s\n", base); printf("%s\n", base);
network net = parse_network_cfg(cfgfile); network net = parse_network_cfg(cfgfile);
if(weightfile){ if(weightfile){

View File

@ -6,13 +6,13 @@
dropout_layer make_dropout_layer(int batch, int inputs, float probability) dropout_layer make_dropout_layer(int batch, int inputs, float probability)
{ {
dropout_layer l = {0}; dropout_layer l = { (LAYER_TYPE)0 };
l.type = DROPOUT; l.type = DROPOUT;
l.probability = probability; l.probability = probability;
l.inputs = inputs; l.inputs = inputs;
l.outputs = inputs; l.outputs = inputs;
l.batch = batch; l.batch = batch;
l.rand = calloc(inputs*batch, sizeof(float)); l.rand = (float*)calloc(inputs * batch, sizeof(float));
l.scale = 1./(1.-probability); l.scale = 1./(1.-probability);
l.forward = forward_dropout_layer; l.forward = forward_dropout_layer;
l.backward = backward_dropout_layer; l.backward = backward_dropout_layer;
@ -27,7 +27,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
void resize_dropout_layer(dropout_layer *l, int inputs) void resize_dropout_layer(dropout_layer *l, int inputs)
{ {
l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); l->rand = (float*)realloc(l->rand, l->inputs * l->batch * sizeof(float));
#ifdef GPU #ifdef GPU
cuda_free(l->rand_gpu); cuda_free(l->rand_gpu);

View File

@ -6,6 +6,9 @@
typedef layer dropout_layer; typedef layer dropout_layer;
#ifdef __cplusplus
extern "C" {
#endif
dropout_layer make_dropout_layer(int batch, int inputs, float probability); dropout_layer make_dropout_layer(int batch, int inputs, float probability);
void forward_dropout_layer(dropout_layer l, network_state state); void forward_dropout_layer(dropout_layer l, network_state state);
@ -16,5 +19,8 @@ void resize_dropout_layer(dropout_layer *l, int inputs);
void forward_dropout_layer_gpu(dropout_layer l, network_state state); void forward_dropout_layer_gpu(dropout_layer l, network_state state);
void backward_dropout_layer_gpu(dropout_layer l, network_state state); void backward_dropout_layer_gpu(dropout_layer l, network_state state);
#endif
#ifdef __cplusplus
}
#endif #endif
#endif #endif

View File

@ -2,11 +2,9 @@
#include "curand.h" #include "curand.h"
#include "cublas_v2.h" #include "cublas_v2.h"
extern "C" {
#include "dropout_layer.h" #include "dropout_layer.h"
#include "cuda.h" #include "cuda.h"
#include "utils.h" #include "utils.h"
}
__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
{ {

View File

@ -7,7 +7,10 @@
#include <math.h> #include <math.h>
#include <float.h> #include <float.h>
#include <string.h> #include <string.h>
#include <stdint.h>
#ifdef _WIN32
#include <intrin.h>
#endif
#if defined(_OPENMP) #if defined(_OPENMP)
#include <omp.h> #include <omp.h>
#endif #endif
@ -37,7 +40,7 @@ void gemm_bin(int M, int N, int K, float ALPHA,
float *random_matrix(int rows, int cols) float *random_matrix(int rows, int cols)
{ {
int i; int i;
float *m = calloc(rows*cols, sizeof(float)); float* m = (float*)calloc(rows * cols, sizeof(float));
for(i = 0; i < rows*cols; ++i){ for(i = 0; i < rows*cols; ++i){
m[i] = (float)rand()/RAND_MAX; m[i] = (float)rand()/RAND_MAX;
} }
@ -83,7 +86,6 @@ void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
// XNOR bitwise GEMM for binary neural network // XNOR bitwise GEMM for binary neural network
//-------------------------------------------- //--------------------------------------------
#include <stdint.h>
static inline unsigned char xnor(unsigned char a, unsigned char b) { static inline unsigned char xnor(unsigned char a, unsigned char b) {
//return a == b; //return a == b;
@ -318,6 +320,7 @@ void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb)
} }
} }
#ifndef GPU
uint8_t reverse_8_bit(uint8_t a) { uint8_t reverse_8_bit(uint8_t a) {
return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16; return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16;
} }
@ -465,6 +468,9 @@ void transpose_bin(char *A, char *B, const int n, const int m,
} }
*/ */
#else
extern void transpose_32x32_bits_reversed_diagonale(uint32_t* A, uint32_t* B, int m, int n);
#endif
// transpose by 32-bit // transpose by 32-bit
void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m, void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
@ -483,7 +489,7 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
//transpose_32x32_bits_my(&A[a_index/32], &B[b_index/32], lda/32, ldb/32); //transpose_32x32_bits_my(&A[a_index/32], &B[b_index/32], lda/32, ldb/32);
} }
for (; j < m; ++j) { for (; j < m; ++j) {
if (get_bit(A, i*lda + j)) set_bit(B, j*ldb + i); if (get_bit((const unsigned char* const)A, i * lda + j)) set_bit((unsigned char* const)B, j * ldb + i);
} }
} }
} }
@ -703,7 +709,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
else { else {
for (i = 0; i < M; ++i) { for (i = 0; i < M; ++i) {
for (k = 0; k < K; ++k) { for (k = 0; k < K; ++k) {
register float A_PART = ALPHA*A[i*lda + k]; float A_PART = ALPHA * A[i * lda + k];
for (j = 0; j < N; ++j) { for (j = 0; j < N; ++j) {
C[i*ldc + j] += A_PART*B[k*ldb + j]; C[i*ldc + j] += A_PART*B[k*ldb + j];
} }
@ -730,9 +736,6 @@ void gemm_nn(int M, int N, int K, float ALPHA,
} }
#define TILE_M 4 // 4 ops
#define TILE_N 16 // AVX2 = 2 ops * 8 floats
#define TILE_K 16 // loop
void gemm_nn_fast(int M, int N, int K, float ALPHA, void gemm_nn_fast(int M, int N, int K, float ALPHA,
float *A, int lda, float *A, int lda,
@ -1286,16 +1289,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
} }
static inline float im2col_get_pixel(float *im, int height, int width, int channels,
int row, int col, int channel, int pad)
{
row -= pad;
col -= pad;
if (row < 0 || col < 0 ||
row >= height || col >= width) return 0;
return im[col + width*(row + height*channel)];
}
//From Berkeley Vision's Caffe! //From Berkeley Vision's Caffe!
//https://github.com/BVLC/caffe/blob/master/LICENSE //https://github.com/BVLC/caffe/blob/master/LICENSE
@ -1645,7 +1639,7 @@ void im2col_cpu_custom_bin(float* data_im,
__m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS); __m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS);
uint16_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1 uint16_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1
uint16_t *dst_ptr = &((unsigned char*)data_col)[col_index / 8]; uint16_t* dst_ptr = &((uint16_t*)data_col)[col_index / 8];
*dst_ptr |= (mask << (col_index % 8)); *dst_ptr |= (mask << (col_index % 8));
} }
@ -1657,7 +1651,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
float val = data_im[im_col + width*(im_row + height*c_im)]; float val = data_im[im_col + width*(im_row + height*c_im)];
if(val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char* const)data_col, col_index);
} }
} }
@ -1671,7 +1665,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char* const)data_col, col_index);
} }
} }
@ -1685,7 +1679,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char* const)data_col, col_index);
} }
} }
@ -1699,7 +1693,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char* const)data_col, col_index);
} }
} }
@ -1713,7 +1707,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char* const)data_col, col_index);
} }
} }
} }
@ -1952,7 +1946,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
int i, j, k; int i, j, k;
for (i = 0; i < M; ++i) { for (i = 0; i < M; ++i) {
for (k = 0; k < K; ++k) { for (k = 0; k < K; ++k) {
register float A_PART = ALPHA*A[i*lda + k]; float A_PART = ALPHA * A[i * lda + k];
for (j = 0; j < N; ++j) { for (j = 0; j < N; ++j) {
C[i*ldc + j] += A_PART*B[k*ldb + j]; C[i*ldc + j] += A_PART*B[k*ldb + j];
} }
@ -2239,7 +2233,7 @@ void im2col_cpu_custom_bin(float* data_im,
int col_index = c * new_ldb + h * width_col + w; int col_index = c * new_ldb + h * width_col + w;
float val = data_im[im_col + width*(im_row + height*c_im)]; float val = data_im[im_col + width*(im_row + height*c_im)];
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char*)data_col, col_index);
} }
for (; w < width_col - pad; ++w) { for (; w < width_col - pad; ++w) {
@ -2250,7 +2244,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
float val = data_im[im_col + width*(im_row + height*c_im)]; float val = data_im[im_col + width*(im_row + height*c_im)];
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char*)data_col, col_index);
} }
} }
@ -2264,7 +2258,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char*)data_col, col_index);
} }
} }
@ -2278,7 +2272,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char*)data_col, col_index);
} }
} }
@ -2292,7 +2286,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char*)data_col, col_index);
} }
} }
@ -2306,7 +2300,7 @@ void im2col_cpu_custom_bin(float* data_im,
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
if (val > 0) set_bit(data_col, col_index); if (val > 0) set_bit((unsigned char*)data_col, col_index);
} }
} }
} }
@ -2346,7 +2340,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size)
memset(dst, 0, dst_size); memset(dst, 0, dst_size);
size_t i; size_t i;
char *byte_arr = calloc(size, sizeof(char)); char* byte_arr = (char*)calloc(size, sizeof(char));
for (i = 0; i < size; ++i) { for (i = 0; i < size; ++i) {
if (src[i] > 0) byte_arr[i] = 1; if (src[i] > 0) byte_arr[i] = 1;
} }
@ -2578,7 +2572,7 @@ void gemm_nt(int M, int N, int K, float ALPHA,
int i,j,k; int i,j,k;
for(i = 0; i < M; ++i){ for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){ for(j = 0; j < N; ++j){
register float sum = 0; float sum = 0;
for(k = 0; k < K; ++k){ for(k = 0; k < K; ++k){
sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; sum += ALPHA*A[i*lda+k]*B[j*ldb + k];
} }
@ -2595,7 +2589,7 @@ void gemm_tn(int M, int N, int K, float ALPHA,
int i,j,k; int i,j,k;
for(i = 0; i < M; ++i){ for(i = 0; i < M; ++i){
for(k = 0; k < K; ++k){ for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[k*lda+i]; float A_PART = ALPHA * A[k * lda + i];
for(j = 0; j < N; ++j){ for(j = 0; j < N; ++j){
C[i*ldc+j] += A_PART*B[k*ldb+j]; C[i*ldc+j] += A_PART*B[k*ldb+j];
} }
@ -2611,7 +2605,7 @@ void gemm_tt(int M, int N, int K, float ALPHA,
int i,j,k; int i,j,k;
for(i = 0; i < M; ++i){ for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){ for(j = 0; j < N; ++j){
register float sum = 0; float sum = 0;
for(k = 0; k < K; ++k){ for(k = 0; k < K; ++k){
sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; sum += ALPHA*A[i+k*lda]*B[k+j*ldb];
} }
@ -2668,9 +2662,9 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
float *C_gpu, int ldc) float *C_gpu, int ldc)
{ {
cublasHandle_t handle = blas_handle(); cublasHandle_t handle = blas_handle();
cudaError_t stream_status = cublasSetStream(handle, get_cuda_stream()); cudaError_t stream_status = (cudaError_t)cublasSetStream(handle, get_cuda_stream());
CHECK_CUDA(stream_status); CHECK_CUDA(stream_status);
cudaError_t status = cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), cudaError_t status = (cudaError_t)cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N),
(TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
CHECK_CUDA(status); CHECK_CUDA(status);
} }

View File

@ -3,6 +3,9 @@
#include "activations.h" #include "activations.h"
#include <stdint.h> #include <stdint.h>
#include <stddef.h> #include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
float *weights, float *input, float *output, float *mean); float *weights, float *input, float *output, float *mean);
@ -56,6 +59,7 @@ void im2col_cpu_custom_transpose(float* data_im,
void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a); void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a);
LIB_API void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n);
void gemm_bin(int M, int N, int K, float ALPHA, void gemm_bin(int M, int N, int K, float ALPHA,
char *A, int lda, char *A, int lda,
@ -109,4 +113,7 @@ void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
float BETA, float BETA,
float *C, int ldc); float *C, int ldc);
#endif #endif
#ifdef __cplusplus
}
#endif
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -1,133 +1,228 @@
/* Declarations for getopt. #ifdef _WIN32
Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc. #ifndef __GETOPT_H__
/**
* DISCLAIMER
* This file is part of the mingw-w64 runtime package.
*
* The mingw-w64 runtime package and its code is distributed in the hope that it
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
/*
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Sponsored in part by the Defense Advanced Research Projects
* Agency (DARPA) and Air Force Research Laboratory, Air Force
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
*/
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Dieter Baron and Thomas Klausner.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
This file is part of the GNU C Library. Its master source is NOT part of #define __GETOPT_H__
the C library, however. The master source lives in /gd/gnu/lib.
The GNU C Library is free software; you can redistribute it and/or /* All the headers include this file. */
modify it under the terms of the GNU Library General Public License as #include <crtdefs.h>
published by the Free Software Foundation; either version 2 of the #include <errno.h>
License, or (at your option) any later version. #include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
The GNU C Library is distributed in the hope that it will be useful, #ifdef __cplusplus
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#ifndef _GETOPT_H
#define _GETOPT_H 1
#ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
/* For communication from `getopt' to the caller. #define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
When `getopt' finds an option that takes an argument,
the argument value is returned here.
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
extern char *optarg; //extern int optind; /* index of first non-option in argv */
//extern int optopt; /* single option character, as parsed */
//extern int opterr; /* flag to enable built-in diagnostics... */
// /* (user may set to zero, to suppress) */
//
//extern char *optarg; /* pointer to argument of current option */
/* Index in ARGV of the next element to be scanned. #define PRINT_ERROR ((opterr) && (*options != ':'))
This is used for communication to and from the caller
and for communication between successive calls to `getopt'.
On entry to `getopt', zero means this is the first call; initialize. #define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
When `getopt' returns EOF, this is the index of the first of the /* return values */
non-option elements that the caller should itself scan. #define BADCH (int)'?'
#define BADARG ((*options == ':') ? (int)':' : (int)'?')
#define INORDER (int)1
Otherwise, `optind' communicates from one call to the next #ifndef __CYGWIN__
how much of ARGV has been scanned so far. */ #define __progname __argv[0]
extern int optind;
/* Callers store zero here to inhibit the error message `getopt' prints
for unrecognized options. */
extern int opterr;
/* Set to an option character which was unrecognized. */
extern int optopt;
/* Describe the long-named options requested by the application.
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
of `struct option' terminated by an element containing a name which is
zero.
The field `has_arg' is:
no_argument (or 0) if the option does not take an argument,
required_argument (or 1) if the option requires an argument,
optional_argument (or 2) if the option takes an optional argument.
If the field `flag' is not NULL, it points to a variable that is set
to the value given in the field `val' when the option is found, but
left unchanged if the option is not found.
To have a long-named option do something other than set an `int' to
a compiled-in constant, such as set a value from `optarg', set the
option's `flag' field to zero and its `val' field to a nonzero
value (the equivalent single-letter option character, if there is
one). For long options that have a zero `flag' field, `getopt'
returns the contents of the `val' field. */
struct option
{
#if defined (__STDC__) && __STDC__
const char *name;
#else #else
char *name; extern char __declspec(dllimport) * __progname;
#endif #endif
/* has_arg can't be an enum because some compilers complain about
type mismatches in all the code that assumes it is an int. */
int has_arg;
int *flag;
int val;
};
/* Names for the values of the `has_arg' field of `struct option'. */ #ifdef __CYGWIN__
static char EMSG[] = "";
#else
#define EMSG ""
#endif
#define no_argument 0 static int getopt_internal(int, char* const*, const char*,
#define required_argument 1 const struct option*, int*, int);
#define optional_argument 2 static int parse_long_options(char* const*, const char*,
const struct option*, int*, int);
static int gcd(int, int);
static void permute_args(int, int, int, char* const*);
#if defined (__STDC__) && __STDC__ static char* place = EMSG; /* option letter processing */
#ifdef __GNU_LIBRARY__
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation
errors, only prototype getopt for the GNU C library. */
extern int getopt (int argc, char *const *argv, const char *shortopts);
#else /* not __GNU_LIBRARY__ */
extern int getopt ();
#endif /* __GNU_LIBRARY__ */
extern int getopt_long (int argc, char *const *argv, const char *shortopts,
const struct option *longopts, int *longind);
extern int getopt_long_only (int argc, char *const *argv,
const char *shortopts,
const struct option *longopts, int *longind);
/* Internal only. Users should not call this directly. */ /* XXX: set optreset to 1 rather than these two */
extern int _getopt_internal (int argc, char *const *argv, static int nonopt_start = -1; /* first non option argument (for permute) */
const char *shortopts, static int nonopt_end = -1; /* first option after non options (for permute) */
const struct option *longopts, int *longind,
int long_only);
#else /* not __STDC__ */
extern int getopt ();
extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal (); /* Error messages */
#endif /* __STDC__ */ static const char recargchar[] = "option requires an argument -- %c";
static const char recargstring[] = "option requires an argument -- %s";
static const char ambig[] = "ambiguous option -- %.*s";
static const char noarg[] = "option doesn't take an argument -- %.*s";
static const char illoptchar[] = "unknown option -- %c";
static const char illoptstring[] = "unknown option -- %s";
#ifdef __cplusplus static void _vwarnx(const char* fmt, va_list ap);
static void warnx(const char* fmt, ...);
/*
* Compute the greatest common divisor of a and b.
*/
static int gcd(int a, int b);
/*
* Exchange the block from nonopt_start to nonopt_end with the block
* from nonopt_end to opt_end (keeping the same order of arguments
* in each block).
*/
static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv);
#ifdef REPLACE_GETOPT
/*
* getopt --
* Parse argc/argv argument vector.
*
* [eventually this will replace the BSD getopt]
*/
int getopt(int nargc, char* const* nargv, const char* options);
#endif /* REPLACE_GETOPT */
//extern int getopt(int nargc, char * const *nargv, const char *options);
#ifdef _BSD_SOURCE
/*
* BSD adds the non-standard `optreset' feature, for reinitialisation
* of `getopt' parsing. We support this feature, for applications which
* proclaim their BSD heritage, before including this header; however,
* to maintain portability, developers are advised to avoid it.
*/
#define optreset __mingw_optreset
extern int optreset;
#endif
#ifdef __cplusplus
}
#endif
/*
* POSIX requires the `getopt' API to be specified in `unistd.h';
* thus, `unistd.h' includes this header. However, we do not want
* to expose the `getopt_long' or `getopt_long_only' APIs, when
* included in this manner. Thus, close the standard __GETOPT_H__
* declarations block, and open an additional __GETOPT_LONG_H__
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
* to declare the extended API.
*/
#endif /* !defined(__GETOPT_H__) */
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
#define __GETOPT_LONG_H__
#ifdef __cplusplus
extern "C" {
#endif
/*
* parse_long_options --
* Parse long options in argc/argv argument vector.
* Returns -1 if short_too is set and the option does not match long_options.
*/
static int parse_long_options(char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too);
/*
* getopt_internal --
* Parse argc/argv argument vector. Called by user level routines.
*/
static int getopt_internal(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags);
/*
* getopt_long --
* Parse argc/argv argument vector.
*/
int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
/*
* getopt_long_only --
* Parse argc/argv argument vector.
*/
int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
/*
* Previous MinGW implementation had...
*/
#ifndef HAVE_DECL_GETOPT
/*
* ...for the long form API only; keep this for compatibility.
*/
#define HAVE_DECL_GETOPT 1
#endif
#ifdef __cplusplus
} }
#endif #endif
#endif /* _GETOPT_H */ #endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
#endif

View File

@ -1,49 +1,43 @@
#ifdef _WIN32
#include "gettimeofday.h" #include "gettimeofday.h"
int gettimeofday(struct timeval *tv, struct timezone *tz) LIB_API int gettimeofday(struct timeval* tp, struct timezone* tzp)
{ {
FILETIME ft; static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL);
unsigned __int64 tmpres = 0; SYSTEMTIME system_time;
static int tzflag; FILETIME file_time;
uint64_t time;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
GetSystemTime(&system_time);
SystemTimeToFileTime(&system_time, &file_time);
time = ((uint64_t)file_time.dwLowDateTime);
time += ((uint64_t)file_time.dwHighDateTime) << 32;
/*converting file time to unix epoch*/ /*converting file time to unix epoch*/
tmpres -= DELTA_EPOCH_IN_MICROSECS; tp->tv_sec = (long)((time - EPOCH) / 10000000L);
tmpres /= 10; /*convert into microseconds*/ tp->tv_usec = (long)(system_time.wMilliseconds * 1000);
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
if (NULL != tz)
{
if (!tzflag)
{
_tzset();
tzflag++;
}
tz->tz_minuteswest = _timezone / 60;
tz->tz_dsttime = _daylight;
}
return 0; return 0;
}
LIB_API int clock_gettime(int dummy, struct timespec* ct)
{
LARGE_INTEGER count;
if (g_first_time) {
g_first_time = 0;
if (0 == QueryPerformanceFrequency(&g_counts_per_sec)) {
g_counts_per_sec.QuadPart = 0;
}
}
if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) || (0 == QueryPerformanceCounter(&count))) {
return -1;
} }
/* never worry about timersub type activies again -- from GLIBC and upcased. */ ct->tv_sec = count.QuadPart / g_counts_per_sec.QuadPart;
int timersub(struct timeval *a, struct timeval *b, struct timeval *result) ct->tv_nsec = ((count.QuadPart % g_counts_per_sec.QuadPart) * BILLION) / g_counts_per_sec.QuadPart;
{
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec;
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec;
if ((result)->tv_usec < 0) {
--(result)->tv_sec;
(result)->tv_usec += 1000000;
}
return 0; return 0;
} }
#endif

View File

@ -1,20 +1,39 @@
#pragma once #ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <Winsock2.h>
#include <stdint.h>
#include < time.h > #include < time.h >
#include <windows.h> //I've ommited this line. #include "darknet.h"
#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 #define CLOCK_REALTIME (1)
#else #define BILLION (1E9)
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#ifndef timersub
#define timersub(a, b, result) \
do { \
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
if ((result)->tv_usec < 0) { \
--(result)->tv_sec; \
(result)->tv_usec += 1000000; \
} \
} while (0)
#endif // timersub
#ifdef __cplusplus
extern "C" {
#endif #endif
struct timezone static unsigned char g_first_time = 1;
{ static LARGE_INTEGER g_counts_per_sec;
int tz_minuteswest; /* minutes W of Greenwich */
int tz_dsttime; /* type of dst correction */ LIB_API int gettimeofday(struct timeval*, struct timezone*);
}; LIB_API int clock_gettime(int, struct timespec*);
int gettimeofday(struct timeval *tv, struct timezone *tz); #ifdef __cplusplus
}
#endif
#endif
/* never worry about timersub type activies again -- from GLIBC and upcased. */
int timersub(struct timeval *a, struct timeval *b, struct timeval *result);

View File

@ -5,13 +5,12 @@
#include "blas.h" #include "blas.h"
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#endif #endif
int inverted = 1; int inverted = 1;
int noi = 1; int noi = 1;
//static const int nind = 5; static const unsigned int n_ind = 5;
#define nind 5
typedef struct { typedef struct {
char **data; char **data;
@ -22,7 +21,7 @@ char *fgetgo(FILE *fp)
{ {
if(feof(fp)) return 0; if(feof(fp)) return 0;
size_t size = 94; size_t size = 94;
char *line = malloc(size*sizeof(char)); char* line = (char*)malloc(size * sizeof(char));
if(size != fread(line, sizeof(char), size, fp)){ if(size != fread(line, sizeof(char), size, fp)){
free(line); free(line);
return 0; return 0;
@ -35,21 +34,21 @@ moves load_go_moves(char *filename)
{ {
moves m; moves m;
m.n = 128; m.n = 128;
m.data = calloc(128, sizeof(char*)); m.data = (char**)calloc(128, sizeof(char*));
FILE *fp = fopen(filename, "rb"); FILE *fp = fopen(filename, "rb");
int count = 0; int count = 0;
char *line = 0; char *line = 0;
while((line = fgetgo(fp))){ while((line = fgetgo(fp))){
if(count >= m.n){ if(count >= m.n){
m.n *= 2; m.n *= 2;
m.data = realloc(m.data, m.n*sizeof(char*)); m.data = (char**)realloc(m.data, m.n * sizeof(char*));
} }
m.data[count] = line; m.data[count] = line;
++count; ++count;
} }
printf("%d\n", count); printf("%d\n", count);
m.n = count; m.n = count;
m.data = realloc(m.data, count*sizeof(char*)); m.data = (char**)realloc(m.data, count * sizeof(char*));
return m; return m;
} }
@ -127,12 +126,12 @@ void train_go(char *cfgfile, char *weightfile)
} }
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
char *backup_directory = "/home/pjreddie/backup/"; char* backup_directory = "backup/";
char buff[256]; char buff[256];
float *board = calloc(19*19*net.batch, sizeof(float)); float* board = (float*)calloc(19 * 19 * net.batch, sizeof(float));
float *move = calloc(19*19*net.batch, sizeof(float)); float* move = (float*)calloc(19 * 19 * net.batch, sizeof(float));
moves m = load_go_moves("/home/pjreddie/backup/go.train"); moves m = load_go_moves("backup/go.train");
//moves m = load_go_moves("games.txt"); //moves m = load_go_moves("games.txt");
int N = m.n; int N = m.n;
@ -187,7 +186,7 @@ void propagate_liberty(float *board, int *lib, int *visited, int row, int col, i
int *calculate_liberties(float *board) int *calculate_liberties(float *board)
{ {
int *lib = calloc(19*19, sizeof(int)); int* lib = (int*)calloc(19 * 19, sizeof(int));
int visited[361]; int visited[361];
int i, j; int i, j;
for(j = 0; j < 19; ++j){ for(j = 0; j < 19; ++j){
@ -222,7 +221,7 @@ void print_board(float *board, int swap, int *indexes)
int index = j*19 + i; int index = j*19 + i;
if(indexes){ if(indexes){
int found = 0; int found = 0;
for(n = 0; n < nind; ++n){ for (n = 0; n < n_ind; ++n) {
if(index == indexes[n]){ if(index == indexes[n]){
found = 1; found = 1;
/* /*
@ -365,9 +364,9 @@ int generate_move(network net, int player, float *board, int multi, float thresh
} }
} }
int indexes[nind]; int indexes[n_ind];
top_k(move, 19*19, nind, indexes); top_k(move, 19*19, n_ind, indexes);
if(thresh > move[indexes[0]]) thresh = move[indexes[nind-1]]; if(thresh > move[indexes[0]]) thresh = move[indexes[n_ind-1]];
for(i = 0; i < 19; ++i){ for(i = 0; i < 19; ++i){
for(j = 0; j < 19; ++j){ for(j = 0; j < 19; ++j){
@ -382,12 +381,12 @@ int generate_move(network net, int player, float *board, int multi, float thresh
int index = sample_array(move, 19*19); int index = sample_array(move, 19*19);
if(print){ if(print){
top_k(move, 19*19, nind, indexes); top_k(move, 19*19, n_ind, indexes);
for(i = 0; i < nind; ++i){ for(i = 0; i < n_ind; ++i){
if (!move[indexes[i]]) indexes[i] = -1; if (!move[indexes[i]]) indexes[i] = -1;
} }
print_board(board, player, indexes); print_board(board, player, indexes);
for(i = 0; i < nind; ++i){ for(i = 0; i < n_ind; ++i){
fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]); fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]);
} }
} }
@ -411,9 +410,9 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
set_batch_network(&net, 1); set_batch_network(&net, 1);
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
float *board = calloc(19*19, sizeof(float)); float* board = (float*)calloc(19 * 19, sizeof(float));
float *move = calloc(19*19, sizeof(float)); float* move = (float*)calloc(19 * 19, sizeof(float));
moves m = load_go_moves("/home/pjreddie/backup/go.test"); moves m = load_go_moves("backup/go.test");
int N = m.n; int N = m.n;
int i; int i;
@ -439,9 +438,9 @@ void engine_go(char *filename, char *weightfile, int multi)
} }
srand(time(0)); srand(time(0));
set_batch_network(&net, 1); set_batch_network(&net, 1);
float *board = calloc(19*19, sizeof(float)); float* board = (float*)calloc(19 * 19, sizeof(float));
char *one = calloc(91, sizeof(char)); char* one = (char*)calloc(91, sizeof(char));
char *two = calloc(91, sizeof(char)); char* two = (char*)calloc(91, sizeof(char));
int passed = 0; int passed = 0;
while(1){ while(1){
char buff[256]; char buff[256];
@ -612,8 +611,8 @@ void test_go(char *cfg, char *weights, int multi)
} }
srand(time(0)); srand(time(0));
set_batch_network(&net, 1); set_batch_network(&net, 1);
float *board = calloc(19*19, sizeof(float)); float* board = (float*)calloc(19 * 19, sizeof(float));
float *move = calloc(19*19, sizeof(float)); float* move = (float*)calloc(19 * 19, sizeof(float));
int color = 1; int color = 1;
while(1){ while(1){
float *output = network_predict(net, board); float *output = network_predict(net, board);
@ -642,11 +641,11 @@ void test_go(char *cfg, char *weights, int multi)
if(board[i]) move[i] = 0; if(board[i]) move[i] = 0;
} }
int indexes[nind]; int indexes[n_ind];
int row, col; int row, col;
top_k(move, 19*19, nind, indexes); top_k(move, 19 * 19, n_ind, indexes);
print_board(board, color, indexes); print_board(board, color, indexes);
for(i = 0; i < nind; ++i){ for (i = 0; i < n_ind; ++i) {
int index = indexes[i]; int index = indexes[i];
row = index / 19; row = index / 19;
col = index % 19; col = index % 19;
@ -664,7 +663,7 @@ void test_go(char *cfg, char *weights, int multi)
int cnum = sscanf(line, "%c", &c); int cnum = sscanf(line, "%c", &c);
if (strlen(line) == 0 || dnum) { if (strlen(line) == 0 || dnum) {
--picked; --picked;
if (picked < nind){ if (picked < n_ind){
int index = indexes[picked]; int index = indexes[picked];
row = index / 19; row = index / 19;
col = index % 19; col = index % 19;
@ -764,9 +763,9 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
int count = 0; int count = 0;
set_batch_network(&net, 1); set_batch_network(&net, 1);
set_batch_network(&net2, 1); set_batch_network(&net2, 1);
float *board = calloc(19*19, sizeof(float)); float* board = (float*)calloc(19 * 19, sizeof(float));
char *one = calloc(91, sizeof(char)); char* one = (char*)calloc(91, sizeof(char));
char *two = calloc(91, sizeof(char)); char* two = (char*)calloc(91, sizeof(char));
int done = 0; int done = 0;
int player = 1; int player = 1;
int p1 = 0; int p1 = 0;

View File

@ -30,42 +30,42 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
{ {
fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs);
batch = batch / steps; batch = batch / steps;
layer l = {0}; layer l = { (LAYER_TYPE)0 };
l.batch = batch; l.batch = batch;
l.type = GRU; l.type = GRU;
l.steps = steps; l.steps = steps;
l.inputs = inputs; l.inputs = inputs;
l.input_z_layer = malloc(sizeof(layer)); l.input_z_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.input_z_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); *(l.input_z_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
l.input_z_layer->batch = batch; l.input_z_layer->batch = batch;
l.state_z_layer = malloc(sizeof(layer)); l.state_z_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.state_z_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); *(l.state_z_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
l.state_z_layer->batch = batch; l.state_z_layer->batch = batch;
l.input_r_layer = malloc(sizeof(layer)); l.input_r_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.input_r_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); *(l.input_r_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
l.input_r_layer->batch = batch; l.input_r_layer->batch = batch;
l.state_r_layer = malloc(sizeof(layer)); l.state_r_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.state_r_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); *(l.state_r_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
l.state_r_layer->batch = batch; l.state_r_layer->batch = batch;
l.input_h_layer = malloc(sizeof(layer)); l.input_h_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.input_h_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); *(l.input_h_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
l.input_h_layer->batch = batch; l.input_h_layer->batch = batch;
l.state_h_layer = malloc(sizeof(layer)); l.state_h_layer = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.state_h_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); *(l.state_h_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
l.state_h_layer->batch = batch; l.state_h_layer->batch = batch;
@ -74,16 +74,16 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
l.outputs = outputs; l.outputs = outputs;
l.output = calloc(outputs*batch*steps, sizeof(float)); l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
l.delta = calloc(outputs*batch*steps, sizeof(float)); l.delta = (float*)calloc(outputs * batch * steps, sizeof(float));
l.state = calloc(outputs*batch, sizeof(float)); l.state = (float*)calloc(outputs * batch, sizeof(float));
l.prev_state = calloc(outputs*batch, sizeof(float)); l.prev_state = (float*)calloc(outputs * batch, sizeof(float));
l.forgot_state = calloc(outputs*batch, sizeof(float)); l.forgot_state = (float*)calloc(outputs * batch, sizeof(float));
l.forgot_delta = calloc(outputs*batch, sizeof(float)); l.forgot_delta = (float*)calloc(outputs * batch, sizeof(float));
l.r_cpu = calloc(outputs*batch, sizeof(float)); l.r_cpu = (float*)calloc(outputs * batch, sizeof(float));
l.z_cpu = calloc(outputs*batch, sizeof(float)); l.z_cpu = (float*)calloc(outputs * batch, sizeof(float));
l.h_cpu = calloc(outputs*batch, sizeof(float)); l.h_cpu = (float*)calloc(outputs * batch, sizeof(float));
l.forward = forward_gru_layer; l.forward = forward_gru_layer;
l.backward = backward_gru_layer; l.backward = backward_gru_layer;

View File

@ -6,6 +6,9 @@
#include "layer.h" #include "layer.h"
#include "network.h" #include "network.h"
#ifdef __cplusplus
extern "C" {
#endif
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize); layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
void forward_gru_layer(layer l, network_state state); void forward_gru_layer(layer l, network_state state);
@ -20,5 +23,8 @@ void push_gru_layer(layer l);
void pull_gru_layer(layer l); void pull_gru_layer(layer l);
#endif #endif
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -1,3 +1,4 @@
#include "image.h"
#include "http_stream.h" #include "http_stream.h"
#ifdef OPENCV #ifdef OPENCV
@ -17,9 +18,10 @@ using std::endl;
// socket related abstractions: // socket related abstractions:
// //
#ifdef _WIN32 #ifdef _WIN32
#ifndef USE_CMAKE_LIBS
#pragma comment(lib, "ws2_32.lib") #pragma comment(lib, "ws2_32.lib")
#include <winsock.h> #endif
#include <windows.h> #include "gettimeofday.h"
#include <time.h> #include <time.h>
#define PORT unsigned long #define PORT unsigned long
#define ADDRPOINTER int* #define ADDRPOINTER int*
@ -44,7 +46,7 @@ static int close_socket(SOCKET s) {
return result; return result;
} }
#else // nix #else // nix
#include <unistd.h> #include "darkunistd.h"
#include <sys/time.h> #include <sys/time.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/socket.h> #include <sys/socket.h>
@ -85,16 +87,15 @@ static int close_socket(SOCKET s) {
#endif // _WIN32 #endif // _WIN32
#include "opencv2/opencv.hpp" #include <opencv2/opencv.hpp>
#include "opencv2/highgui/highgui.hpp" #include <opencv2/highgui/highgui.hpp>
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#include "opencv2/imgproc/imgproc_c.h" #include <opencv2/imgproc/imgproc_c.h>
#ifndef CV_VERSION_EPOCH #ifndef CV_VERSION_EPOCH
#include "opencv2/videoio/videoio.hpp" #include <opencv2/videoio/videoio.hpp>
#endif #endif
using namespace cv; using namespace cv;
#include "image.h"
class MJPG_sender class MJPG_sender
@ -195,7 +196,8 @@ public:
std::vector<int> params; std::vector<int> params;
params.push_back(IMWRITE_JPEG_QUALITY); params.push_back(IMWRITE_JPEG_QUALITY);
params.push_back(quality); params.push_back(quality);
cv::imencode(".jpg", frame, outbuf, params); //cv::imencode(".jpg", frame, outbuf, params); //REMOVED FOR COMPATIBILITY
std::cerr << "cv::imencode call disabled!" << std::endl;
size_t outlen = outbuf.size(); size_t outlen = outbuf.size();
#ifdef _WIN32 #ifdef _WIN32
@ -227,17 +229,17 @@ public:
} }
maxfd = (maxfd>client ? maxfd : client); maxfd = (maxfd>client ? maxfd : client);
FD_SET(client, &master); FD_SET(client, &master);
_write(client, "HTTP/1.0 200 OK\r\n", 0); _write(client, "HTTP/1.0 200 OK\n", 0);
_write(client, _write(client,
"Server: Mozarella/2.2\r\n" "Server: Mozarella/2.2\n"
"Accept-Range: bytes\r\n" "Accept-Range: bytes\n"
"Connection: close\r\n" "Connection: close\n"
"Max-Age: 0\r\n" "Max-Age: 0\n"
"Expires: 0\r\n" "Expires: 0\n"
"Cache-Control: no-cache, private\r\n" "Cache-Control: no-cache, private\n"
"Pragma: no-cache\r\n" "Pragma: no-cache\n"
"Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n" "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\n"
"\r\n", 0); "\n", 0);
cerr << "MJPG_sender: new client " << client << endl; cerr << "MJPG_sender: new client " << client << endl;
} }
else // existing client, just stream pix else // existing client, just stream pix
@ -249,7 +251,7 @@ public:
} }
char head[400]; char head[400];
sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen); sprintf(head, "--mjpegstream\nContent-Type: image/jpeg\nContent-Length: %zu\n\n", outlen);
_write(s, head, 0); _write(s, head, 0);
int n = _write(s, (char*)(&outbuf[0]), outlen); int n = _write(s, (char*)(&outbuf[0]), outlen);
//cerr << "known client " << s << " " << n << endl; //cerr << "known client " << s << " " << n << endl;
@ -406,18 +408,18 @@ public:
} }
maxfd = (maxfd>client ? maxfd : client); maxfd = (maxfd>client ? maxfd : client);
FD_SET(client, &master); FD_SET(client, &master);
_write(client, "HTTP/1.0 200 OK\r\n", 0); _write(client, "HTTP/1.0 200 OK\n", 0);
_write(client, _write(client,
"Server: Mozarella/2.2\r\n" "Server: Mozarella/2.2\n"
"Accept-Range: bytes\r\n" "Accept-Range: bytes\n"
"Connection: close\r\n" "Connection: close\n"
"Max-Age: 0\r\n" "Max-Age: 0\n"
"Expires: 0\r\n" "Expires: 0\n"
"Cache-Control: no-cache, private\r\n" "Cache-Control: no-cache, private\n"
"Pragma: no-cache\r\n" "Pragma: no-cache\n"
"Content-Type: application/json\r\n" "Content-Type: application/json\n"
//"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n" //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n"
"\r\n", 0); "\n", 0);
_write(client, "[\n", 0); // open JSON array _write(client, "[\n", 0); // open JSON array
int n = _write(client, outputbuf, outlen); int n = _write(client, outputbuf, outlen);
cerr << "JSON_sender: new client " << client << endl; cerr << "JSON_sender: new client " << client << endl;
@ -473,7 +475,7 @@ void send_json(detection *dets, int nboxes, int classes, char **names, long long
// ---------------------------------------- // ----------------------------------------
CvCapture* get_capture_video_stream(char *path) { CvCapture* get_capture_video_stream(const char *path) {
CvCapture* cap = NULL; CvCapture* cap = NULL;
try { try {
cap = (CvCapture*)new cv::VideoCapture(path); cap = (CvCapture*)new cv::VideoCapture(path);
@ -511,7 +513,7 @@ IplImage* get_webcam_frame(CvCapture *cap) {
src = cvCloneImage(&tmp); src = cvCloneImage(&tmp);
} }
else { else {
std::cout << " Video-stream stoped! \n"; std::cout << " Video-stream stopped! \n";
} }
} }
catch (...) { catch (...) {
@ -536,9 +538,6 @@ int get_stream_fps_cpp(CvCapture *cap) {
return fps; return fps;
} }
// ---------------------------------------- // ----------------------------------------
extern "C" {
image ipl_to_image(IplImage* src); // image.c
}
image image_data_augmentation(IplImage* ipl, int w, int h, image image_data_augmentation(IplImage* ipl, int w, int h,
int pleft, int ptop, int swidth, int sheight, int flip, int pleft, int ptop, int swidth, int sheight, int flip,
@ -701,4 +700,4 @@ void stop_timer_and_show() {
} }
void stop_timer_and_show_name(char *name) { stop_timer_and_show(); } void stop_timer_and_show_name(char *name) { stop_timer_and_show(); }
void total_time() {} void total_time() {}
#endif // C++11 #endif // C++11

View File

@ -1,11 +1,14 @@
#pragma once
#ifndef HTTP_STREAM_H #ifndef HTTP_STREAM_H
#define HTTP_STREAM_H #define HTTP_STREAM_H
#include "darknet.h" #include "darknet.h"
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/core/version.hpp>
#include "opencv2/imgproc/imgproc_c.h" #include <opencv2/highgui/highgui_c.h>
#include <opencv2/imgproc/imgproc_c.h>
#ifndef CV_VERSION_EPOCH
#include <opencv2/videoio/videoio_c.h>
#endif
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
@ -18,7 +21,7 @@ extern "C" {
void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout); void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout);
void send_mjpeg(IplImage* ipl, int port, int timeout, int quality); void send_mjpeg(IplImage* ipl, int port, int timeout, int quality);
CvCapture* get_capture_webcam(int index); CvCapture* get_capture_webcam(int index);
CvCapture* get_capture_video_stream(char *path); CvCapture* get_capture_video_stream(const char *path);
IplImage* get_webcam_frame(CvCapture *cap); IplImage* get_webcam_frame(CvCapture *cap);
int get_stream_fps_cpp(CvCapture *cap); int get_stream_fps_cpp(CvCapture *cap);

View File

@ -5,9 +5,14 @@
#include <stdint.h> #include <stdint.h>
#include "darknet.h" #include "darknet.h"
#ifdef __cplusplus
extern "C" {
#endif
void im2col_cpu(float* data_im, void im2col_cpu(float* data_im,
int channels, int height, int width, int channels, int height, int width,
int ksize, int stride, int pad, float* data_col); int ksize, int stride, int pad, float* data_col);
float im2col_get_pixel(float* im, int height, int width, int channels,
int row, int col, int channel, int pad);
#ifdef GPU #ifdef GPU
@ -63,5 +68,8 @@ void convolve_bin_cpu(float *input, float *weights, float *output, int in_w, int
void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad); void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
#endif
#ifdef __cplusplus
}
#endif #endif
#endif #endif

View File

@ -1,19 +1,15 @@
#include "cuda_runtime.h" #include <cuda_runtime.h>
#include "curand.h" #include <curand.h>
#include "cublas_v2.h" #include <cublas_v2.h>
#include <stdint.h> #include <stdint.h>
extern "C" {
#include "im2col.h" #include "im2col.h"
#include "cuda.h" #include "cuda.h"
}
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <cuda.h> //#include <cuda.h>
#define FULL_MASK 0xffffffff
#define WARP_SIZE 32
template<typename T1, typename T2> template<typename T1, typename T2>
__device__ inline T1 __shfl_custom(T1 val, T2 lane) { __device__ inline T1 __shfl_custom(T1 val, T2 lane) {
@ -154,11 +150,6 @@ __global__ void im2col_align_gpu_kernel(const int n, const float* data_im,
{ {
//__shared__ float tmp_s[1]; //__shared__ float tmp_s[1];
//#define SHRED_VALS ((BLOCK / 169) * )
//__shared__ float dst_s[1024];
//__shared__ float dst_s[1024];
//__shared__ uint32_t bit_s[32];
//__shared__ uint8_t bit_s[128];
int index = blockIdx.x*blockDim.x + threadIdx.x; int index = blockIdx.x*blockDim.x + threadIdx.x;
for (; index < n; index += blockDim.x*gridDim.x) { for (; index < n; index += blockDim.x*gridDim.x) {
@ -604,8 +595,7 @@ __device__ void transpose32_optimized(uint32_t A[32]) {
} }
} }
#define BLOCK_TRANSPOSE32 256 extern "C" {
__device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n) __device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n)
{ {
//unsigned A_tmp[32]; //unsigned A_tmp[32];
@ -626,7 +616,7 @@ __device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B
#pragma unroll 32 #pragma unroll 32
for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i]; for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i];
} }
}
// transpose 32x32 bit // transpose 32x32 bit
__global__ void transpose_bin_gpu_kernel_32(uint32_t *A, uint32_t *B, const int n, const int m, __global__ void transpose_bin_gpu_kernel_32(uint32_t *A, uint32_t *B, const int n, const int m,

View File

@ -1,4 +1,3 @@
#include "darknet.h"
#include "image.h" #include "image.h"
#include "utils.h" #include "utils.h"
#include "blas.h" #include "blas.h"
@ -6,25 +5,31 @@
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#ifndef STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h" #include "stb_image.h"
#endif
#ifndef STB_IMAGE_WRITE_IMPLEMENTATION
#define STB_IMAGE_WRITE_IMPLEMENTATION #define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h" #include "stb_image_write.h"
#endif
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#include "opencv2/imgproc/imgproc_c.h" #include <opencv2/imgproc/imgproc_c.h>
#include "opencv2/core/types_c.h" #include <opencv2/core/types_c.h>
#include "opencv2/core/version.hpp" #include <opencv2/core/version.hpp>
#ifndef CV_VERSION_EPOCH #ifndef CV_VERSION_EPOCH
#include "opencv2/videoio/videoio_c.h" #include <opencv2/videoio/videoio_c.h>
#include "opencv2/imgcodecs/imgcodecs_c.h" #include <opencv2/imgcodecs/imgcodecs_c.h>
#include "http_stream.h" #include "http_stream.h"
#endif #endif
#include "http_stream.h" #include "http_stream.h"
#ifndef CV_RGB
#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 ) #define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
#endif #endif
#endif
extern int check_mistakes; extern int check_mistakes;
int windows = 0; int windows = 0;
@ -255,9 +260,9 @@ image **load_alphabet()
{ {
int i, j; int i, j;
const int nsize = 8; const int nsize = 8;
image **alphabets = calloc(nsize, sizeof(image)); image** alphabets = (image**)calloc(nsize, sizeof(image*));
for(j = 0; j < nsize; ++j){ for(j = 0; j < nsize; ++j){
alphabets[j] = calloc(128, sizeof(image)); alphabets[j] = (image*)calloc(128, sizeof(image));
for(i = 32; i < 127; ++i){ for(i = 32; i < 127; ++i){
char buff[256]; char buff[256];
sprintf(buff, "data/labels/%d_%d.png", i, j); sprintf(buff, "data/labels/%d_%d.png", i, j);
@ -273,7 +278,7 @@ image **load_alphabet()
detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names) detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names)
{ {
int selected_num = 0; int selected_num = 0;
detection_with_class* result_arr = calloc(dets_num, sizeof(detection_with_class)); detection_with_class* result_arr = (detection_with_class*)calloc(dets_num, sizeof(detection_with_class));
int i; int i;
for (i = 0; i < dets_num; ++i) { for (i = 0; i < dets_num; ++i) {
int best_class = -1; int best_class = -1;
@ -505,7 +510,7 @@ void save_cv_png(IplImage *img, const char *name)
IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3); IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
cvCvtColor(img, img_rgb, CV_RGB2BGR); cvCvtColor(img, img_rgb, CV_RGB2BGR);
stbi_write_png(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 0); stbi_write_png(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 0);
cvRelease(&img_rgb); cvRelease((void**)&img_rgb);
} }
void save_cv_jpg(IplImage *img, const char *name) void save_cv_jpg(IplImage *img, const char *name)
@ -513,7 +518,7 @@ void save_cv_jpg(IplImage *img, const char *name)
IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3); IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
cvCvtColor(img, img_rgb, CV_RGB2BGR); cvCvtColor(img, img_rgb, CV_RGB2BGR);
stbi_write_jpg(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 80); stbi_write_jpg(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 80);
cvRelease(&img_rgb); cvRelease((void**)&img_rgb);
} }
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output) void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output)
@ -952,8 +957,8 @@ void normalize_image(image p)
void normalize_image2(image p) void normalize_image2(image p)
{ {
float *min = calloc(p.c, sizeof(float)); float* min = (float*)calloc(p.c, sizeof(float));
float *max = calloc(p.c, sizeof(float)); float* max = (float*)calloc(p.c, sizeof(float));
int i,j; int i,j;
for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w];
@ -982,7 +987,7 @@ void normalize_image2(image p)
image copy_image(image p) image copy_image(image p)
{ {
image copy = p; image copy = p;
copy.data = calloc(p.h*p.w*p.c, sizeof(float)); copy.data = (float*)calloc(p.h * p.w * p.c, sizeof(float));
memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
return copy; return copy;
} }
@ -1252,7 +1257,7 @@ void save_image_png(image im, const char *name)
char buff[256]; char buff[256];
//sprintf(buff, "%s (%d)", name, windows); //sprintf(buff, "%s (%d)", name, windows);
sprintf(buff, "%s.png", name); sprintf(buff, "%s.png", name);
unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
int i,k; int i,k;
for(k = 0; k < im.c; ++k){ for(k = 0; k < im.c; ++k){
for(i = 0; i < im.w*im.h; ++i){ for(i = 0; i < im.w*im.h; ++i){
@ -1273,7 +1278,7 @@ void save_image_options(image im, const char *name, IMTYPE f, int quality)
else if (f == TGA) sprintf(buff, "%s.tga", name); else if (f == TGA) sprintf(buff, "%s.tga", name);
else if (f == JPG) sprintf(buff, "%s.jpg", name); else if (f == JPG) sprintf(buff, "%s.jpg", name);
else sprintf(buff, "%s.png", name); else sprintf(buff, "%s.png", name);
unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
int i, k; int i, k;
for (k = 0; k < im.c; ++k) { for (k = 0; k < im.c; ++k) {
for (i = 0; i < im.w*im.h; ++i) { for (i = 0; i < im.w*im.h; ++i) {
@ -1331,14 +1336,14 @@ image make_empty_image(int w, int h, int c)
image make_image(int w, int h, int c) image make_image(int w, int h, int c)
{ {
image out = make_empty_image(w,h,c); image out = make_empty_image(w,h,c);
out.data = calloc(h*w*c, sizeof(float)); out.data = (float*)calloc(h * w * c, sizeof(float));
return out; return out;
} }
image make_random_image(int w, int h, int c) image make_random_image(int w, int h, int c)
{ {
image out = make_empty_image(w,h,c); image out = make_empty_image(w,h,c);
out.data = calloc(h*w*c, sizeof(float)); out.data = (float*)calloc(h * w * c, sizeof(float));
int i; int i;
for(i = 0; i < w*h*c; ++i){ for(i = 0; i < w*h*c; ++i){
out.data[i] = (rand_normal() * .25) + .5; out.data[i] = (rand_normal() * .25) + .5;
@ -1585,7 +1590,7 @@ image random_augment_image(image im, float angle, float aspect, int low, int hig
int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
float scale = (float)r / min; float scale = (float)r / min;
float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; float rad = rand_uniform(-angle, angle) * 2.0 * M_PI / 360.;
float dx = (im.w*scale/aspect - size) / 2.; float dx = (im.w*scale/aspect - size) / 2.;
float dy = (im.h*scale - size) / 2.; float dy = (im.h*scale - size) / 2.;

View File

@ -7,8 +7,20 @@
#include <float.h> #include <float.h>
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
#ifdef OPENCV
#include <opencv2/highgui/highgui_c.h>
#include <opencv2/imgproc/imgproc_c.h>
#include <opencv2/core/types_c.h>
#include <opencv2/core/version.hpp>
#ifndef CV_VERSION_EPOCH
#include <opencv2/videoio/videoio_c.h>
#include <opencv2/imgcodecs/imgcodecs_c.h>
#endif
#endif
#include "box.h" #include "box.h"
#ifdef __cplusplus
extern "C" {
#endif
/* /*
typedef struct { typedef struct {
int w; int w;
@ -80,6 +92,11 @@ image load_image(char *filename, int w, int h, int c);
//LIB_API image load_image_color(char *filename, int w, int h); //LIB_API image load_image_color(char *filename, int w, int h);
image **load_alphabet(); image **load_alphabet();
#ifdef OPENCV
LIB_API image get_image_from_stream(CvCapture* cap);
LIB_API image get_image_from_stream_cpp(CvCapture* cap);
LIB_API image ipl_to_image(IplImage* src);
#endif
//float get_pixel(image m, int x, int y, int c); //float get_pixel(image m, int x, int y, int c);
//float get_pixel_extend(image m, int x, int y, int c); //float get_pixel_extend(image m, int x, int y, int c);
//void set_pixel(image m, int x, int y, int c, float val); //void set_pixel(image m, int x, int y, int c, float val);
@ -90,5 +107,8 @@ image get_image_layer(image m, int l);
//LIB_API void free_image(image m); //LIB_API void free_image(image m);
void test_resize(char *filename); void test_resize(char *filename);
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -98,7 +98,7 @@ void free_layer(layer l)
if (l.x_gpu) cuda_free(l.x_gpu); if (l.x_gpu) cuda_free(l.x_gpu);
if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); if (l.x_norm_gpu) cuda_free(l.x_norm_gpu);
if (l.align_bit_weights_gpu) cuda_free(l.align_bit_weights_gpu); if (l.align_bit_weights_gpu) cuda_free((float *)l.align_bit_weights_gpu);
if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu); if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu);
if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu); if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu);
if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu); if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu);

View File

@ -1,10 +1,12 @@
#ifndef BASE_LAYER_H #ifndef BASE_LAYER_H
#define BASE_LAYER_H #define BASE_LAYER_H
#include "darknet.h"
#include "activations.h" #include "activations.h"
#include "stddef.h" #include "stddef.h"
#include "tree.h" #include "tree.h"
#ifdef __cplusplus
extern "C" {
#endif
//struct network_state; //struct network_state;
@ -330,4 +332,7 @@ struct layer{
*/ */
//void free_layer(layer); //void free_layer(layer);
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -5,7 +5,7 @@
list *make_list() list *make_list()
{ {
list *l = malloc(sizeof(list)); list* l = (list*)malloc(sizeof(list));
l->size = 0; l->size = 0;
l->front = 0; l->front = 0;
l->back = 0; l->back = 0;
@ -40,18 +40,18 @@ void *list_pop(list *l){
void list_insert(list *l, void *val) void list_insert(list *l, void *val)
{ {
node *new = malloc(sizeof(node)); node* newnode = (node*)malloc(sizeof(node));
new->val = val; newnode->val = val;
new->next = 0; newnode->next = 0;
if(!l->back){ if(!l->back){
l->front = new; l->front = newnode;
new->prev = 0; newnode->prev = 0;
}else{ }else{
l->back->next = new; l->back->next = newnode;
new->prev = l->back; newnode->prev = l->back;
} }
l->back = new; l->back = newnode;
++l->size; ++l->size;
} }
@ -84,7 +84,7 @@ void free_list_contents_kvp(list *l)
{ {
node *n = l->front; node *n = l->front;
while (n) { while (n) {
kvp *p = n->val; kvp* p = (kvp*)n->val;
free(p->key); free(p->key);
free(n->val); free(n->val);
n = n->next; n = n->next;
@ -93,7 +93,7 @@ void free_list_contents_kvp(list *l)
void **list_to_array(list *l) void **list_to_array(list *l)
{ {
void **a = calloc(l->size, sizeof(void*)); void** a = (void**)calloc(l->size, sizeof(void*));
int count = 0; int count = 0;
node *n = l->front; node *n = l->front;
while(n){ while(n){

View File

@ -13,6 +13,9 @@ typedef struct list{
node *back; node *back;
} list; } list;
#ifdef __cplusplus
extern "C" {
#endif
list *make_list(); list *make_list();
int list_find(list *l, void *val); int list_find(list *l, void *val);
@ -24,4 +27,7 @@ void free_list(list *l);
void free_list_contents(list *l); void free_list_contents(list *l);
void free_list_contents_kvp(list *l); void free_list_contents_kvp(list *l);
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -26,7 +26,7 @@ int local_out_width(local_layer l)
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
{ {
int i; int i;
local_layer l = {0}; local_layer l = { (LAYER_TYPE)0 };
l.type = LOCAL; l.type = LOCAL;
l.h = h; l.h = h;
@ -47,19 +47,19 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
l.outputs = l.out_h * l.out_w * l.out_c; l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = l.w * l.h * l.c; l.inputs = l.w * l.h * l.c;
l.weights = calloc(c*n*size*size*locations, sizeof(float)); l.weights = (float*)calloc(c * n * size * size * locations, sizeof(float));
l.weight_updates = calloc(c*n*size*size*locations, sizeof(float)); l.weight_updates = (float*)calloc(c * n * size * size * locations, sizeof(float));
l.biases = calloc(l.outputs, sizeof(float)); l.biases = (float*)calloc(l.outputs, sizeof(float));
l.bias_updates = calloc(l.outputs, sizeof(float)); l.bias_updates = (float*)calloc(l.outputs, sizeof(float));
// float scale = 1./sqrt(size*size*c); // float scale = 1./sqrt(size*size*c);
float scale = sqrt(2./(size*size*c)); float scale = sqrt(2./(size*size*c));
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1); for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float)); l.col_image = (float*)calloc(out_h * out_w * size * size * c, sizeof(float));
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
l.forward = forward_local_layer; l.forward = forward_local_layer;
l.backward = backward_local_layer; l.backward = backward_local_layer;

View File

@ -9,6 +9,9 @@
typedef layer local_layer; typedef layer local_layer;
#ifdef __cplusplus
extern "C" {
#endif
#ifdef GPU #ifdef GPU
void forward_local_layer_gpu(local_layer layer, network_state state); void forward_local_layer_gpu(local_layer layer, network_state state);
void backward_local_layer_gpu(local_layer layer, network_state state); void backward_local_layer_gpu(local_layer layer, network_state state);
@ -27,5 +30,8 @@ void update_local_layer(local_layer layer, int batch, float learning_rate, float
void bias_output(float *output, float *biases, int batch, int n, int size); void bias_output(float *output, float *biases, int batch, int n, int size);
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -30,7 +30,7 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
{ {
fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs);
batch = batch / steps; batch = batch / steps;
layer l = { 0 }; layer l = { (LAYER_TYPE)0 };
l.batch = batch; l.batch = batch;
l.type = LSTM; l.type = LSTM;
l.steps = steps; l.steps = steps;
@ -39,49 +39,49 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
l.out_h = 1; l.out_h = 1;
l.out_c = outputs; l.out_c = outputs;
l.uf = malloc(sizeof(layer)); l.uf = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); *(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
l.uf->batch = batch; l.uf->batch = batch;
if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;
l.ui = malloc(sizeof(layer)); l.ui = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); *(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
l.ui->batch = batch; l.ui->batch = batch;
if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;
l.ug = malloc(sizeof(layer)); l.ug = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); *(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
l.ug->batch = batch; l.ug->batch = batch;
if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;
l.uo = malloc(sizeof(layer)); l.uo = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); *(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
l.uo->batch = batch; l.uo->batch = batch;
if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;
l.wf = malloc(sizeof(layer)); l.wf = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); *(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
l.wf->batch = batch; l.wf->batch = batch;
if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;
l.wi = malloc(sizeof(layer)); l.wi = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); *(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
l.wi->batch = batch; l.wi->batch = batch;
if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;
l.wg = malloc(sizeof(layer)); l.wg = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); *(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
l.wg->batch = batch; l.wg->batch = batch;
if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;
l.wo = malloc(sizeof(layer)); l.wo = (layer*)malloc(sizeof(layer));
fprintf(stderr, "\t\t"); fprintf(stderr, "\t\t");
*(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); *(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
l.wo->batch = batch; l.wo->batch = batch;
@ -90,27 +90,27 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
l.batch_normalize = batch_normalize; l.batch_normalize = batch_normalize;
l.outputs = outputs; l.outputs = outputs;
l.output = calloc(outputs*batch*steps, sizeof(float)); l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
l.state = calloc(outputs*batch, sizeof(float)); l.state = (float*)calloc(outputs * batch, sizeof(float));
l.forward = forward_lstm_layer; l.forward = forward_lstm_layer;
l.update = update_lstm_layer; l.update = update_lstm_layer;
l.prev_state_cpu = calloc(batch*outputs, sizeof(float)); l.prev_state_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.prev_cell_cpu = calloc(batch*outputs, sizeof(float)); l.prev_cell_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.cell_cpu = calloc(batch*outputs*steps, sizeof(float)); l.cell_cpu = (float*)calloc(batch*outputs*steps, sizeof(float));
l.f_cpu = calloc(batch*outputs, sizeof(float)); l.f_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.i_cpu = calloc(batch*outputs, sizeof(float)); l.i_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.g_cpu = calloc(batch*outputs, sizeof(float)); l.g_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.o_cpu = calloc(batch*outputs, sizeof(float)); l.o_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.c_cpu = calloc(batch*outputs, sizeof(float)); l.c_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.h_cpu = calloc(batch*outputs, sizeof(float)); l.h_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.temp_cpu = calloc(batch*outputs, sizeof(float)); l.temp_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.temp2_cpu = calloc(batch*outputs, sizeof(float)); l.temp2_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.temp3_cpu = calloc(batch*outputs, sizeof(float)); l.temp3_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.dc_cpu = calloc(batch*outputs, sizeof(float)); l.dc_cpu = (float*)calloc(batch*outputs, sizeof(float));
l.dh_cpu = calloc(batch*outputs, sizeof(float)); l.dh_cpu = (float*)calloc(batch*outputs, sizeof(float));
#ifdef GPU #ifdef GPU
l.forward_gpu = forward_lstm_layer_gpu; l.forward_gpu = forward_lstm_layer_gpu;

View File

@ -6,7 +6,10 @@
#include "network.h" #include "network.h"
#define USET #define USET
layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize); #ifdef __cplusplus
extern "C" {
#endif
LIB_API layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
void forward_lstm_layer(layer l, network_state state); void forward_lstm_layer(layer l, network_state state);
void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay); void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay);
@ -15,6 +18,9 @@ void update_lstm_layer(layer l, int batch, float learning_rate, float momentum,
void forward_lstm_layer_gpu(layer l, network_state state); void forward_lstm_layer_gpu(layer l, network_state state);
void backward_lstm_layer_gpu(layer l, network_state state); void backward_lstm_layer_gpu(layer l, network_state state);
void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
#endif
#ifdef __cplusplus
}
#endif #endif
#endif #endif

View File

@ -15,7 +15,7 @@ void free_matrix(matrix m)
float matrix_topk_accuracy(matrix truth, matrix guess, int k) float matrix_topk_accuracy(matrix truth, matrix guess, int k)
{ {
int *indexes = calloc(k, sizeof(int)); int* indexes = (int*)calloc(k, sizeof(int));
int n = truth.cols; int n = truth.cols;
int i,j; int i,j;
int correct = 0; int correct = 0;
@ -48,15 +48,15 @@ matrix resize_matrix(matrix m, int size)
int i; int i;
if (m.rows == size) return m; if (m.rows == size) return m;
if (m.rows < size) { if (m.rows < size) {
m.vals = realloc(m.vals, size*sizeof(float*)); m.vals = (float**)realloc(m.vals, size * sizeof(float*));
for (i = m.rows; i < size; ++i) { for (i = m.rows; i < size; ++i) {
m.vals[i] = calloc(m.cols, sizeof(float)); m.vals[i] = (float*)calloc(m.cols, sizeof(float));
} }
} else if (m.rows > size) { } else if (m.rows > size) {
for (i = size; i < m.rows; ++i) { for (i = size; i < m.rows; ++i) {
free(m.vals[i]); free(m.vals[i]);
} }
m.vals = realloc(m.vals, size*sizeof(float*)); m.vals = (float**)realloc(m.vals, size * sizeof(float*));
} }
m.rows = size; m.rows = size;
return m; return m;
@ -79,9 +79,9 @@ matrix make_matrix(int rows, int cols)
matrix m; matrix m;
m.rows = rows; m.rows = rows;
m.cols = cols; m.cols = cols;
m.vals = calloc(m.rows, sizeof(float *)); m.vals = (float**)calloc(m.rows, sizeof(float*));
for(i = 0; i < m.rows; ++i){ for(i = 0; i < m.rows; ++i){
m.vals[i] = calloc(m.cols, sizeof(float)); m.vals[i] = (float*)calloc(m.cols, sizeof(float));
} }
return m; return m;
} }
@ -92,7 +92,7 @@ matrix hold_out_matrix(matrix *m, int n)
matrix h; matrix h;
h.rows = n; h.rows = n;
h.cols = m->cols; h.cols = m->cols;
h.vals = calloc(h.rows, sizeof(float *)); h.vals = (float**)calloc(h.rows, sizeof(float*));
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
int index = rand()%m->rows; int index = rand()%m->rows;
h.vals[i] = m->vals[index]; h.vals[i] = m->vals[index];
@ -103,7 +103,7 @@ matrix hold_out_matrix(matrix *m, int n)
float *pop_column(matrix *m, int c) float *pop_column(matrix *m, int c)
{ {
float *col = calloc(m->rows, sizeof(float)); float* col = (float*)calloc(m->rows, sizeof(float));
int i, j; int i, j;
for(i = 0; i < m->rows; ++i){ for(i = 0; i < m->rows; ++i){
col[i] = m->vals[i][c]; col[i] = m->vals[i][c];
@ -127,18 +127,18 @@ matrix csv_to_matrix(char *filename)
int n = 0; int n = 0;
int size = 1024; int size = 1024;
m.vals = calloc(size, sizeof(float*)); m.vals = (float**)calloc(size, sizeof(float*));
while((line = fgetl(fp))){ while((line = fgetl(fp))){
if(m.cols == -1) m.cols = count_fields(line); if(m.cols == -1) m.cols = count_fields(line);
if(n == size){ if(n == size){
size *= 2; size *= 2;
m.vals = realloc(m.vals, size*sizeof(float*)); m.vals = (float**)realloc(m.vals, size * sizeof(float*));
} }
m.vals[n] = parse_fields(line, m.cols); m.vals[n] = parse_fields(line, m.cols);
free(line); free(line);
++n; ++n;
} }
m.vals = realloc(m.vals, n*sizeof(float*)); m.vals = (float**)realloc(m.vals, n * sizeof(float*));
m.rows = n; m.rows = n;
return m; return m;
} }
@ -225,7 +225,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers)
matrix old_centers = make_matrix(centers.rows, centers.cols); matrix old_centers = make_matrix(centers.rows, centers.cols);
int i, j; int i, j;
int *counts = calloc(centers.rows, sizeof(int)); int *counts = (int*)calloc(centers.rows, sizeof(int));
for (i = 0; i < centers.rows; ++i) { for (i = 0; i < centers.rows; ++i) {
for (j = 0; j < centers.cols; ++j) { for (j = 0; j < centers.cols; ++j) {
old_centers.vals[i][j] = centers.vals[i][j]; old_centers.vals[i][j] = centers.vals[i][j];
@ -268,7 +268,7 @@ void random_centers(matrix data, matrix centers) {
int *sample(int n) int *sample(int n)
{ {
int i; int i;
int *s = calloc(n, sizeof(int)); int* s = (int*)calloc(n, sizeof(int));
for (i = 0; i < n; ++i) s[i] = i; for (i = 0; i < n; ++i) s[i] = i;
for (i = n - 1; i >= 0; --i) { for (i = n - 1; i >= 0; --i) {
int swap = s[i]; int swap = s[i];
@ -301,7 +301,7 @@ void copy(float *x, float *y, int n)
model do_kmeans(matrix data, int k) model do_kmeans(matrix data, int k)
{ {
matrix centers = make_matrix(k, data.cols); matrix centers = make_matrix(k, data.cols);
int *assignments = calloc(data.rows, sizeof(int)); int* assignments = (int*)calloc(data.rows, sizeof(int));
//smart_centers(data, centers); //smart_centers(data, centers);
random_centers(data, centers); // IoU = 67.31% after kmeans random_centers(data, centers); // IoU = 67.31% after kmeans

View File

@ -12,6 +12,9 @@ typedef struct {
matrix centers; matrix centers;
} model; } model;
#ifdef __cplusplus
extern "C" {
#endif
model do_kmeans(matrix data, int k); model do_kmeans(matrix data, int k);
matrix make_matrix(int rows, int cols); matrix make_matrix(int rows, int cols);
@ -28,4 +31,7 @@ matrix resize_matrix(matrix m, int size);
float *pop_column(matrix *m, int c); float *pop_column(matrix *m, int c);
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -47,7 +47,7 @@ void cudnn_maxpool_setup(layer *l)
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
{ {
maxpool_layer l = {0}; maxpool_layer l = { (LAYER_TYPE)0 };
l.type = MAXPOOL; l.type = MAXPOOL;
l.batch = batch; l.batch = batch;
l.h = h; l.h = h;
@ -62,9 +62,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
l.size = size; l.size = size;
l.stride = stride; l.stride = stride;
int output_size = l.out_h * l.out_w * l.out_c * batch; int output_size = l.out_h * l.out_w * l.out_c * batch;
l.indexes = calloc(output_size, sizeof(int)); l.indexes = (int*)calloc(output_size, sizeof(int));
l.output = calloc(output_size, sizeof(float)); l.output = (float*)calloc(output_size, sizeof(float));
l.delta = calloc(output_size, sizeof(float)); l.delta = (float*)calloc(output_size, sizeof(float));
l.forward = forward_maxpool_layer; l.forward = forward_maxpool_layer;
l.backward = backward_maxpool_layer; l.backward = backward_maxpool_layer;
#ifdef GPU #ifdef GPU
@ -93,9 +93,9 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
l->outputs = l->out_w * l->out_h * l->c; l->outputs = l->out_w * l->out_h * l->c;
int output_size = l->outputs * l->batch; int output_size = l->outputs * l->batch;
l->indexes = realloc(l->indexes, output_size * sizeof(int)); l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int));
l->output = realloc(l->output, output_size * sizeof(float)); l->output = (float*)realloc(l->output, output_size * sizeof(float));
l->delta = realloc(l->delta, output_size * sizeof(float)); l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
#ifdef GPU #ifdef GPU
CHECK_CUDA(cudaFree((float *)l->indexes_gpu)); CHECK_CUDA(cudaFree((float *)l->indexes_gpu));

View File

@ -8,6 +8,9 @@
typedef layer maxpool_layer; typedef layer maxpool_layer;
#ifdef __cplusplus
extern "C" {
#endif
image get_maxpool_image(maxpool_layer l); image get_maxpool_image(maxpool_layer l);
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
void resize_maxpool_layer(maxpool_layer *l, int w, int h); void resize_maxpool_layer(maxpool_layer *l, int w, int h);
@ -20,5 +23,8 @@ void backward_maxpool_layer_gpu(maxpool_layer l, network_state state);
void cudnn_maxpool_setup(maxpool_layer *l); void cudnn_maxpool_setup(maxpool_layer *l);
#endif // GPU #endif // GPU
#ifdef __cplusplus
}
#endif #endif
#endif

View File

@ -2,10 +2,8 @@
#include "curand.h" #include "curand.h"
#include "cublas_v2.h" #include "cublas_v2.h"
extern "C" {
#include "maxpool_layer.h" #include "maxpool_layer.h"
#include "cuda.h" #include "cuda.h"
}
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
{ {

View File

@ -180,16 +180,16 @@ network make_network(int n)
{ {
network net = {0}; network net = {0};
net.n = n; net.n = n;
net.layers = calloc(net.n, sizeof(layer)); net.layers = (layer*)calloc(net.n, sizeof(layer));
net.seen = calloc(1, sizeof(uint64_t)); net.seen = (uint64_t*)calloc(1, sizeof(uint64_t));
#ifdef GPU #ifdef GPU
net.input_gpu = calloc(1, sizeof(float *)); net.input_gpu = (float**)calloc(1, sizeof(float*));
net.truth_gpu = calloc(1, sizeof(float *)); net.truth_gpu = (float**)calloc(1, sizeof(float*));
net.input16_gpu = calloc(1, sizeof(float *)); net.input16_gpu = (float**)calloc(1, sizeof(float*));
net.output16_gpu = calloc(1, sizeof(float *)); net.output16_gpu = (float**)calloc(1, sizeof(float*));
net.max_input16_size = calloc(1, sizeof(size_t)); net.max_input16_size = (size_t*)calloc(1, sizeof(size_t));
net.max_output16_size = calloc(1, sizeof(size_t)); net.max_output16_size = (size_t*)calloc(1, sizeof(size_t));
#endif #endif
return net; return net;
} }
@ -300,8 +300,8 @@ float train_network_datum(network net, float *x, float *y)
float train_network_sgd(network net, data d, int n) float train_network_sgd(network net, data d, int n)
{ {
int batch = net.batch; int batch = net.batch;
float *X = calloc(batch*d.X.cols, sizeof(float)); float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
float *y = calloc(batch*d.y.cols, sizeof(float)); float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
int i; int i;
float sum = 0; float sum = 0;
@ -320,8 +320,8 @@ float train_network(network net, data d)
assert(d.X.rows % net.batch == 0); assert(d.X.rows % net.batch == 0);
int batch = net.batch; int batch = net.batch;
int n = d.X.rows / batch; int n = d.X.rows / batch;
float *X = calloc(batch*d.X.cols, sizeof(float)); float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
float *y = calloc(batch*d.y.cols, sizeof(float)); float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
int i; int i;
float sum = 0; float sum = 0;
@ -389,11 +389,11 @@ int recalculate_workspace_size(network *net)
} }
else { else {
free(net->workspace); free(net->workspace);
net->workspace = calloc(1, workspace_size); net->workspace = (float*)calloc(1, workspace_size);
} }
#else #else
free(net->workspace); free(net->workspace);
net->workspace = calloc(1, workspace_size); net->workspace = (float*)calloc(1, workspace_size);
#endif #endif
//fprintf(stderr, " Done!\n"); //fprintf(stderr, " Done!\n");
return 0; return 0;
@ -495,19 +495,19 @@ int resize_network(network *net, int w, int h)
net->input_pinned_cpu_flag = 1; net->input_pinned_cpu_flag = 1;
else { else {
cudaGetLastError(); // reset CUDA-error cudaGetLastError(); // reset CUDA-error
net->input_pinned_cpu = calloc(size, sizeof(float)); net->input_pinned_cpu = (float*)calloc(size, sizeof(float));
net->input_pinned_cpu_flag = 0; net->input_pinned_cpu_flag = 0;
} }
printf(" CUDA allocate done! \n"); printf(" CUDA allocate done! \n");
}else { }else {
free(net->workspace); free(net->workspace);
net->workspace = calloc(1, workspace_size); net->workspace = (float*)calloc(1, workspace_size);
if(!net->input_pinned_cpu_flag) if(!net->input_pinned_cpu_flag)
net->input_pinned_cpu = realloc(net->input_pinned_cpu, size * sizeof(float)); net->input_pinned_cpu = (float*)realloc(net->input_pinned_cpu, size * sizeof(float));
} }
#else #else
free(net->workspace); free(net->workspace);
net->workspace = calloc(1, workspace_size); net->workspace = (float*)calloc(1, workspace_size);
#endif #endif
//fprintf(stderr, " Done!\n"); //fprintf(stderr, " Done!\n");
return 0; return 0;
@ -534,7 +534,7 @@ detection_layer get_network_detection_layer(network net)
} }
} }
fprintf(stderr, "Detection layer not found!!\n"); fprintf(stderr, "Detection layer not found!!\n");
detection_layer l = {0}; detection_layer l = { (LAYER_TYPE)0 };
return l; return l;
} }
@ -632,11 +632,11 @@ detection *make_network_boxes(network *net, float thresh, int *num)
int i; int i;
int nboxes = num_detections(net, thresh); int nboxes = num_detections(net, thresh);
if (num) *num = nboxes; if (num) *num = nboxes;
detection *dets = calloc(nboxes, sizeof(detection)); detection* dets = (detection*)calloc(nboxes, sizeof(detection));
for (i = 0; i < nboxes; ++i) { for (i = 0; i < nboxes; ++i) {
dets[i].prob = calloc(l.classes, sizeof(float)); dets[i].prob = (float*)calloc(l.classes, sizeof(float));
if (l.coords > 4) { if (l.coords > 4) {
dets[i].mask = calloc(l.coords - 4, sizeof(float)); dets[i].mask = (float*)calloc(l.coords - 4, sizeof(float));
} }
} }
return dets; return dets;
@ -645,10 +645,10 @@ detection *make_network_boxes(network *net, float thresh, int *num)
void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter) void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter)
{ {
box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); box* boxes = (box*)calloc(l.w * l.h * l.n, sizeof(box));
float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); float** probs = (float**)calloc(l.w * l.h * l.n, sizeof(float*));
int i, j; int i, j;
for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float)); for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map); get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
for (j = 0; j < l.w*l.h*l.n; ++j) { for (j = 0; j < l.w*l.h*l.n; ++j) {
dets[j].classes = l.classes; dets[j].classes = l.classes;
@ -789,7 +789,7 @@ matrix network_predict_data_multi(network net, data test, int n)
int i,j,b,m; int i,j,b,m;
int k = get_network_output_size(net); int k = get_network_output_size(net);
matrix pred = make_matrix(test.X.rows, k); matrix pred = make_matrix(test.X.rows, k);
float *X = calloc(net.batch*test.X.rows, sizeof(float)); float* X = (float*)calloc(net.batch * test.X.rows, sizeof(float));
for(i = 0; i < test.X.rows; i += net.batch){ for(i = 0; i < test.X.rows; i += net.batch){
for(b = 0; b < net.batch; ++b){ for(b = 0; b < net.batch; ++b){
if(i+b == test.X.rows) break; if(i+b == test.X.rows) break;
@ -814,7 +814,7 @@ matrix network_predict_data(network net, data test)
int i,j,b; int i,j,b;
int k = get_network_output_size(net); int k = get_network_output_size(net);
matrix pred = make_matrix(test.X.rows, k); matrix pred = make_matrix(test.X.rows, k);
float *X = calloc(net.batch*test.X.cols, sizeof(float)); float* X = (float*)calloc(net.batch * test.X.cols, sizeof(float));
for(i = 0; i < test.X.rows; i += net.batch){ for(i = 0; i < test.X.rows; i += net.batch){
for(b = 0; b < net.batch; ++b){ for(b = 0; b < net.batch; ++b){
if(i+b == test.X.rows) break; if(i+b == test.X.rows) break;

View File

@ -6,14 +6,14 @@
#include <stdint.h> #include <stdint.h>
#include "layer.h" #include "layer.h"
#ifdef __cplusplus
extern "C" {
#endif
#include "image.h" #include "image.h"
#include "data.h" #include "data.h"
#include "tree.h" #include "tree.h"
#ifdef __cplusplus
extern "C" {
#endif
/* /*
typedef enum { typedef enum {
CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM

View File

@ -3,7 +3,6 @@
//#include "cublas_v2.h" //#include "cublas_v2.h"
#include "cuda.h" #include "cuda.h"
extern "C" {
#include <stdio.h> #include <stdio.h>
#include <time.h> #include <time.h>
#include <assert.h> #include <assert.h>
@ -35,10 +34,9 @@ extern "C" {
#include "route_layer.h" #include "route_layer.h"
#include "shortcut_layer.h" #include "shortcut_layer.h"
#include "blas.h" #include "blas.h"
}
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#endif #endif
#include "http_stream.h" #include "http_stream.h"
@ -396,9 +394,11 @@ void sync_nets(network *nets, int n, int interval)
float train_networks(network *nets, int n, data d, int interval) float train_networks(network *nets, int n, data d, int interval)
{ {
int i; int i;
#ifdef _DEBUG
int batch = nets[0].batch; int batch = nets[0].batch;
int subdivisions = nets[0].subdivisions; int subdivisions = nets[0].subdivisions;
assert(batch * subdivisions * n == d.X.rows); assert(batch * subdivisions * n == d.X.rows);
#endif
pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
float *errors = (float *) calloc(n, sizeof(float)); float *errors = (float *) calloc(n, sizeof(float));

View File

@ -5,7 +5,7 @@
#include "utils.h" #include "utils.h"
#ifdef OPENCV #ifdef OPENCV
#include "opencv2/highgui/highgui_c.h" #include <opencv2/highgui/highgui_c.h>
#endif #endif
// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 // ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2

View File

@ -5,7 +5,7 @@
layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
{ {
fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
layer layer = {0}; layer layer = { (LAYER_TYPE)0 };
layer.type = NORMALIZATION; layer.type = NORMALIZATION;
layer.batch = batch; layer.batch = batch;
layer.h = layer.out_h = h; layer.h = layer.out_h = h;
@ -15,10 +15,10 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a
layer.size = size; layer.size = size;
layer.alpha = alpha; layer.alpha = alpha;
layer.beta = beta; layer.beta = beta;
layer.output = calloc(h * w * c * batch, sizeof(float)); layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
layer.delta = calloc(h * w * c * batch, sizeof(float)); layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
layer.squared = calloc(h * w * c * batch, sizeof(float)); layer.squared = (float*)calloc(h * w * c * batch, sizeof(float));
layer.norms = calloc(h * w * c * batch, sizeof(float)); layer.norms = (float*)calloc(h * w * c * batch, sizeof(float));
layer.inputs = w*h*c; layer.inputs = w*h*c;
layer.outputs = layer.inputs; layer.outputs = layer.inputs;
@ -46,10 +46,10 @@ void resize_normalization_layer(layer *layer, int w, int h)
layer->out_w = w; layer->out_w = w;
layer->inputs = w*h*c; layer->inputs = w*h*c;
layer->outputs = layer->inputs; layer->outputs = layer->inputs;
layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); layer->output = (float*)realloc(layer->output, h * w * c * batch * sizeof(float));
layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); layer->delta = (float*)realloc(layer->delta, h * w * c * batch * sizeof(float));
layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); layer->squared = (float*)realloc(layer->squared, h * w * c * batch * sizeof(float));
layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); layer->norms = (float*)realloc(layer->norms, h * w * c * batch * sizeof(float));
#ifdef GPU #ifdef GPU
cuda_free(layer->output_gpu); cuda_free(layer->output_gpu);
cuda_free(layer->delta_gpu); cuda_free(layer->delta_gpu);

View File

@ -5,6 +5,9 @@
#include "layer.h" #include "layer.h"
#include "network.h" #include "network.h"
#ifdef __cplusplus
extern "C" {
#endif
layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
void resize_normalization_layer(layer *layer, int h, int w); void resize_normalization_layer(layer *layer, int h, int w);
void forward_normalization_layer(const layer layer, network_state state); void forward_normalization_layer(const layer layer, network_state state);
@ -16,4 +19,7 @@ void forward_normalization_layer_gpu(const layer layer, network_state state);
void backward_normalization_layer_gpu(const layer layer, network_state state); void backward_normalization_layer_gpu(const layer layer, network_state state);
#endif #endif
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -3,6 +3,7 @@
#include <string.h> #include <string.h>
#include "option_list.h" #include "option_list.h"
#include "utils.h" #include "utils.h"
#include "data.h"
list *read_data_cfg(char *filename) list *read_data_cfg(char *filename)
{ {
@ -71,7 +72,7 @@ int read_option(char *s, list *options)
void option_insert(list *l, char *key, char *val) void option_insert(list *l, char *key, char *val)
{ {
kvp *p = malloc(sizeof(kvp)); kvp* p = (kvp*)malloc(sizeof(kvp));
p->key = key; p->key = key;
p->val = val; p->val = val;
p->used = 0; p->used = 0;

View File

@ -9,6 +9,9 @@ typedef struct{
int used; int used;
} kvp; } kvp;
#ifdef __cplusplus
extern "C" {
#endif
list *read_data_cfg(char *filename); list *read_data_cfg(char *filename);
int read_option(char *s, list *options); int read_option(char *s, list *options);
@ -28,4 +31,7 @@ void option_unused(list *l);
//LIB_API metadata get_metadata(char *file); //LIB_API metadata get_metadata(char *file);
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -272,7 +272,7 @@ int *parse_yolo_mask(char *a, int *num)
for (i = 0; i < len; ++i) { for (i = 0; i < len; ++i) {
if (a[i] == ',') ++n; if (a[i] == ',') ++n;
} }
mask = calloc(n, sizeof(int)); mask = (int*)calloc(n, sizeof(int));
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
int val = atoi(a); int val = atoi(a);
mask[i] = val; mask[i] = val;
@ -587,8 +587,8 @@ route_layer parse_route(list *options, size_params params, network net)
if (l[i] == ',') ++n; if (l[i] == ',') ++n;
} }
int *layers = calloc(n, sizeof(int)); int* layers = (int*)calloc(n, sizeof(int));
int *sizes = calloc(n, sizeof(int)); int* sizes = (int*)calloc(n, sizeof(int));
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
int index = atoi(l); int index = atoi(l);
l = strchr(l, ',')+1; l = strchr(l, ',')+1;
@ -693,8 +693,8 @@ void parse_net_options(list *options, network *net)
for(i = 0; i < len; ++i){ for(i = 0; i < len; ++i){
if (l[i] == ',') ++n; if (l[i] == ',') ++n;
} }
int *steps = calloc(n, sizeof(int)); int* steps = (int*)calloc(n, sizeof(int));
float *scales = calloc(n, sizeof(float)); float* scales = (float*)calloc(n, sizeof(float));
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
int step = atoi(l); int step = atoi(l);
float scale = atof(p); float scale = atof(p);
@ -765,7 +765,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
fprintf(stderr, "%4d ", count); fprintf(stderr, "%4d ", count);
s = (section *)n->val; s = (section *)n->val;
options = s->options; options = s->options;
layer l = {0}; layer l = { (LAYER_TYPE)0 };
LAYER_TYPE lt = string_to_layer_type(s->type); LAYER_TYPE lt = string_to_layer_type(s->type);
if(lt == CONVOLUTIONAL){ if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params); l = parse_convolutional(options, params);
@ -864,7 +864,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1; if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
else { else {
cudaGetLastError(); // reset CUDA-error cudaGetLastError(); // reset CUDA-error
net.input_pinned_cpu = calloc(size, sizeof(float)); net.input_pinned_cpu = (float*)calloc(size, sizeof(float));
} }
// pre-allocate memory for inference on Tensor Cores (fp16) // pre-allocate memory for inference on Tensor Cores (fp16)
@ -879,12 +879,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1); net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
} }
else { else {
net.workspace = calloc(1, workspace_size); net.workspace = (float*)calloc(1, workspace_size);
} }
} }
#else #else
if (workspace_size) { if (workspace_size) {
net.workspace = calloc(1, workspace_size); net.workspace = (float*)calloc(1, workspace_size);
} }
#endif #endif
@ -911,7 +911,7 @@ list *read_cfg(char *filename)
strip(line); strip(line);
switch(line[0]){ switch(line[0]){
case '[': case '[':
current = malloc(sizeof(section)); current = (section*)malloc(sizeof(section));
list_insert(sections, current); list_insert(sections, current);
current->options = make_list(); current->options = make_list();
current->type = line; current->type = line;
@ -1091,7 +1091,7 @@ void save_weights(network net, char *filename)
void transpose_matrix(float *a, int rows, int cols) void transpose_matrix(float *a, int rows, int cols)
{ {
float *transpose = calloc(rows*cols, sizeof(float)); float* transpose = (float*)calloc(rows * cols, sizeof(float));
int x, y; int x, y;
for(x = 0; x < rows; ++x){ for(x = 0; x < rows; ++x){
for(y = 0; y < cols; ++y){ for(y = 0; y < cols; ++y){
@ -1313,7 +1313,7 @@ void load_weights(network *net, char *filename)
network *load_network_custom(char *cfg, char *weights, int clear, int batch) network *load_network_custom(char *cfg, char *weights, int clear, int batch)
{ {
printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear); printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
network *net = calloc(1, sizeof(network)); network* net = (network*)calloc(1, sizeof(network));
*net = parse_network_cfg_custom(cfg, batch, 0); *net = parse_network_cfg_custom(cfg, batch, 0);
if (weights && weights[0] != 0) { if (weights && weights[0] != 0) {
load_weights(net, weights); load_weights(net, weights);
@ -1326,7 +1326,7 @@ network *load_network_custom(char *cfg, char *weights, int clear, int batch)
network *load_network(char *cfg, char *weights, int clear) network *load_network(char *cfg, char *weights, int clear)
{ {
printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear); printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
network *net = calloc(1, sizeof(network)); network* net = (network*)calloc(1, sizeof(network));
*net = parse_network_cfg(cfg); *net = parse_network_cfg(cfg);
if (weights && weights[0] != 0) { if (weights && weights[0] != 0) {
load_weights(net, weights); load_weights(net, weights);

View File

@ -2,6 +2,9 @@
#define PARSER_H #define PARSER_H
#include "network.h" #include "network.h"
#ifdef __cplusplus
extern "C" {
#endif
network parse_network_cfg(char *filename); network parse_network_cfg(char *filename);
network parse_network_cfg_custom(char *filename, int batch, int time_steps); network parse_network_cfg_custom(char *filename, int batch, int time_steps);
void save_network(network net, char *filename); void save_network(network net, char *filename);
@ -11,4 +14,7 @@ void save_weights_double(network net, char *filename);
void load_weights(network *net, char *filename); void load_weights(network *net, char *filename);
void load_weights_upto(network *net, char *filename, int cutoff); void load_weights_upto(network *net, char *filename, int cutoff);
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -9,11 +9,10 @@
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#define DOABS 1
region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes) region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes)
{ {
region_layer l = {0}; region_layer l = { (LAYER_TYPE)0 };
l.type = REGION; l.type = REGION;
l.n = n; l.n = n;
@ -22,15 +21,15 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
l.w = w; l.w = w;
l.classes = classes; l.classes = classes;
l.coords = coords; l.coords = coords;
l.cost = calloc(1, sizeof(float)); l.cost = (float*)calloc(1, sizeof(float));
l.biases = calloc(n*2, sizeof(float)); l.biases = (float*)calloc(n * 2, sizeof(float));
l.bias_updates = calloc(n*2, sizeof(float)); l.bias_updates = (float*)calloc(n * 2, sizeof(float));
l.outputs = h*w*n*(classes + coords + 1); l.outputs = h*w*n*(classes + coords + 1);
l.inputs = l.outputs; l.inputs = l.outputs;
l.max_boxes = max_boxes; l.max_boxes = max_boxes;
l.truths = max_boxes*(5); l.truths = max_boxes*(5);
l.delta = calloc(batch*l.outputs, sizeof(float)); l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
l.output = calloc(batch*l.outputs, sizeof(float)); l.output = (float*)calloc(batch * l.outputs, sizeof(float));
int i; int i;
for(i = 0; i < n*2; ++i){ for(i = 0; i < n*2; ++i){
l.biases[i] = .5; l.biases[i] = .5;
@ -61,8 +60,8 @@ void resize_region_layer(layer *l, int w, int h)
l->outputs = h*w*l->n*(l->classes + l->coords + 1); l->outputs = h*w*l->n*(l->classes + l->coords + 1);
l->inputs = l->outputs; l->inputs = l->outputs;
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
#ifdef GPU #ifdef GPU
if (old_w < w || old_h < h) { if (old_w < w || old_h < h) {
@ -444,11 +443,11 @@ void forward_region_layer_gpu(const region_layer l, network_state state)
softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5); softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
} }
float *in_cpu = calloc(l.batch*l.inputs, sizeof(float)); float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
float *truth_cpu = 0; float *truth_cpu = 0;
if(state.truth){ if(state.truth){
int num_truth = l.batch*l.truths; int num_truth = l.batch*l.truths;
truth_cpu = calloc(num_truth, sizeof(float)); truth_cpu = (float*)calloc(num_truth, sizeof(float));
cuda_pull_array(state.truth, truth_cpu, num_truth); cuda_pull_array(state.truth, truth_cpu, num_truth);
} }
cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs); cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs);

View File

@ -6,6 +6,9 @@
typedef layer region_layer; typedef layer region_layer;
#ifdef __cplusplus
extern "C" {
#endif
region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords, int max_boxes); region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords, int max_boxes);
void forward_region_layer(const region_layer l, network_state state); void forward_region_layer(const region_layer l, network_state state);
void backward_region_layer(const region_layer l, network_state state); void backward_region_layer(const region_layer l, network_state state);
@ -20,4 +23,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state);
void backward_region_layer_gpu(region_layer l, network_state state); void backward_region_layer_gpu(region_layer l, network_state state);
#endif #endif
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@ -6,7 +6,7 @@
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse) layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
{ {
layer l = {0}; layer l = { (LAYER_TYPE)0 };
l.type = REORG; l.type = REORG;
l.batch = batch; l.batch = batch;
l.stride = stride; l.stride = stride;
@ -27,8 +27,8 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
l.outputs = l.out_h * l.out_w * l.out_c; l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = h*w*c; l.inputs = h*w*c;
int output_size = l.out_h * l.out_w * l.out_c * batch; int output_size = l.out_h * l.out_w * l.out_c * batch;
l.output = calloc(output_size, sizeof(float)); l.output = (float*)calloc(output_size, sizeof(float));
l.delta = calloc(output_size, sizeof(float)); l.delta = (float*)calloc(output_size, sizeof(float));
l.forward = forward_reorg_layer; l.forward = forward_reorg_layer;
l.backward = backward_reorg_layer; l.backward = backward_reorg_layer;
@ -64,8 +64,8 @@ void resize_reorg_layer(layer *l, int w, int h)
l->inputs = l->outputs; l->inputs = l->outputs;
int output_size = l->outputs * l->batch; int output_size = l->outputs * l->batch;
l->output = realloc(l->output, output_size * sizeof(float)); l->output = (float*)realloc(l->output, output_size * sizeof(float));
l->delta = realloc(l->delta, output_size * sizeof(float)); l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
#ifdef GPU #ifdef GPU
cuda_free(l->output_gpu); cuda_free(l->output_gpu);

View File

@ -6,6 +6,9 @@
#include "layer.h" #include "layer.h"
#include "network.h" #include "network.h"
#ifdef __cplusplus
extern "C" {
#endif
layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse); layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse);
void resize_reorg_layer(layer *l, int w, int h); void resize_reorg_layer(layer *l, int w, int h);
void forward_reorg_layer(const layer l, network_state state); void forward_reorg_layer(const layer l, network_state state);
@ -16,5 +19,8 @@ void forward_reorg_layer_gpu(layer l, network_state state);
void backward_reorg_layer_gpu(layer l, network_state state); void backward_reorg_layer_gpu(layer l, network_state state);
#endif #endif
#ifdef __cplusplus
}
#endif #endif
#endif

Some files were not shown because too many files have changed in this diff Show More