mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
improve compatibility with c++ compilers, prepare for CMake
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@ -28,3 +28,8 @@ Thumbs.db
|
|||||||
# CMake #
|
# CMake #
|
||||||
cmake-build-debug/
|
cmake-build-debug/
|
||||||
CMakeLists.txt
|
CMakeLists.txt
|
||||||
|
build_*/
|
||||||
|
build.*
|
||||||
|
cmake/
|
||||||
|
*.patch
|
||||||
|
.gitignore
|
||||||
|
@ -26,6 +26,38 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define PORT unsigned long
|
||||||
|
#define ADDRPOINTER int*
|
||||||
|
#else
|
||||||
|
#define PORT unsigned short
|
||||||
|
#define SOCKET int
|
||||||
|
#define HOSTENT struct hostent
|
||||||
|
#define SOCKADDR struct sockaddr
|
||||||
|
#define SOCKADDR_IN struct sockaddr_in
|
||||||
|
#define ADDRPOINTER unsigned int*
|
||||||
|
#define INVALID_SOCKET -1
|
||||||
|
#define SOCKET_ERROR -1
|
||||||
|
#endif
|
||||||
|
#define FULL_MASK 0xffffffff
|
||||||
|
#define WARP_SIZE 32
|
||||||
|
#define BLOCK 512
|
||||||
|
#define NUMCHARS 37
|
||||||
|
#define NFRAMES 3
|
||||||
|
#define BLOCK_TRANSPOSE32 256
|
||||||
|
#define DOABS 1
|
||||||
|
#define SECRET_NUM -1234
|
||||||
|
#define C_SHARP_MAX_OBJECTS 1000
|
||||||
|
#define TILE_M 4 // 4 ops
|
||||||
|
#define TILE_N 16 // AVX2 = 2 ops * 8 floats
|
||||||
|
#define TILE_K 16 // loop
|
||||||
|
#ifndef __COMPAR_FN_T
|
||||||
|
#define __COMPAR_FN_T
|
||||||
|
typedef int (*__compar_fn_t)(const void*, const void*);
|
||||||
|
#ifdef __USE_GNU
|
||||||
|
typedef __compar_fn_t comparison_fn_t;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
#define BLOCK 512
|
#define BLOCK 512
|
||||||
|
|
||||||
|
@ -1,17 +1,7 @@
|
|||||||
#pragma once
|
#ifndef YOLO_V2_CLASS_HPP
|
||||||
#ifdef LIB_EXPORTS
|
#define YOLO_V2_CLASS_HPP
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#define LIB_API __declspec(dllexport)
|
#include "darknet.h"
|
||||||
#else
|
|
||||||
#define LIB_API __attribute__((visibility("default")))
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#define LIB_API
|
|
||||||
#else
|
|
||||||
#define LIB_API
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct bbox_t {
|
struct bbox_t {
|
||||||
unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box
|
unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box
|
||||||
@ -28,7 +18,6 @@ struct image_t {
|
|||||||
float *data; // pointer to the image data
|
float *data; // pointer to the image data
|
||||||
};
|
};
|
||||||
|
|
||||||
#define C_SHARP_MAX_OBJECTS 1000
|
|
||||||
struct bbox_t_container {
|
struct bbox_t_container {
|
||||||
bbox_t candidates[C_SHARP_MAX_OBJECTS];
|
bbox_t candidates[C_SHARP_MAX_OBJECTS];
|
||||||
};
|
};
|
||||||
@ -41,8 +30,8 @@ struct bbox_t_container {
|
|||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include <opencv2/opencv.hpp> // C++
|
#include <opencv2/opencv.hpp> // C++
|
||||||
#include "opencv2/highgui/highgui_c.h" // C
|
#include <opencv2/highgui/highgui_c.h> // C
|
||||||
#include "opencv2/imgproc/imgproc_c.h" // C
|
#include <opencv2/imgproc/imgproc_c.h> // C
|
||||||
#endif // OPENCV
|
#endif // OPENCV
|
||||||
|
|
||||||
extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu);
|
extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu);
|
||||||
@ -658,3 +647,4 @@ void free_img(image_t m) {
|
|||||||
|
|
||||||
#endif // __cplusplus
|
#endif // __cplusplus
|
||||||
*/
|
*/
|
||||||
|
#endif
|
||||||
|
@ -3,10 +3,8 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "activations.h"
|
#include "activations.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__device__ float lhtan_activate_kernel(float x)
|
__device__ float lhtan_activate_kernel(float x)
|
||||||
|
@ -11,15 +11,15 @@
|
|||||||
|
|
||||||
layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
|
layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
|
||||||
{
|
{
|
||||||
layer l = {0};
|
layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = ACTIVE;
|
l.type = ACTIVE;
|
||||||
|
|
||||||
l.inputs = inputs;
|
l.inputs = inputs;
|
||||||
l.outputs = inputs;
|
l.outputs = inputs;
|
||||||
l.batch=batch;
|
l.batch=batch;
|
||||||
|
|
||||||
l.output = calloc(batch*inputs, sizeof(float*));
|
l.output = (float*)calloc(batch * inputs, sizeof(float));
|
||||||
l.delta = calloc(batch*inputs, sizeof(float*));
|
l.delta = (float*)calloc(batch * inputs, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_activation_layer;
|
l.forward = forward_activation_layer;
|
||||||
l.backward = backward_activation_layer;
|
l.backward = backward_activation_layer;
|
||||||
|
@ -5,6 +5,9 @@
|
|||||||
#include "layer.h"
|
#include "layer.h"
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
|
layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
|
||||||
|
|
||||||
void forward_activation_layer(layer l, network_state state);
|
void forward_activation_layer(layer l, network_state state);
|
||||||
@ -15,5 +18,8 @@ void forward_activation_layer_gpu(layer l, network_state state);
|
|||||||
void backward_activation_layer_gpu(layer l, network_state state);
|
void backward_activation_layer_gpu(layer l, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -8,6 +8,9 @@
|
|||||||
// LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
|
// LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
|
||||||
//}ACTIVATION;
|
//}ACTIVATION;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
ACTIVATION get_activation(char *s);
|
ACTIVATION get_activation(char *s);
|
||||||
|
|
||||||
char *get_activation_string(ACTIVATION a);
|
char *get_activation_string(ACTIVATION a);
|
||||||
@ -87,5 +90,8 @@ static inline float leaky_gradient(float x){return (x>0) ? 1 : .1f;}
|
|||||||
static inline float tanh_gradient(float x){return 1-x*x;}
|
static inline float tanh_gradient(float x){return 1-x*x;}
|
||||||
static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01f : .125f;}
|
static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01f : .125f;}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
#include "classifier.h"
|
#include "classifier.h"
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <winsock.h>
|
|
||||||
#include "gettimeofday.h"
|
#include "gettimeofday.h"
|
||||||
#else
|
#else
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
@ -14,10 +13,10 @@
|
|||||||
|
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#include "opencv2/core/version.hpp"
|
#include <opencv2/core/version.hpp>
|
||||||
#ifndef CV_VERSION_EPOCH
|
#ifndef CV_VERSION_EPOCH
|
||||||
#include "opencv2/videoio/videoio_c.h"
|
#include <opencv2/videoio/videoio_c.h>
|
||||||
#endif
|
#endif
|
||||||
image get_image_from_stream(CvCapture *cap);
|
image get_image_from_stream(CvCapture *cap);
|
||||||
#endif
|
#endif
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
|
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c);
|
fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c);
|
||||||
avgpool_layer l = {0};
|
avgpool_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = AVGPOOL;
|
l.type = AVGPOOL;
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
l.h = h;
|
l.h = h;
|
||||||
@ -17,8 +17,8 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
|
|||||||
l.outputs = l.out_c;
|
l.outputs = l.out_c;
|
||||||
l.inputs = h*w*c;
|
l.inputs = h*w*c;
|
||||||
int output_size = l.outputs * batch;
|
int output_size = l.outputs * batch;
|
||||||
l.output = calloc(output_size, sizeof(float));
|
l.output = (float*)calloc(output_size, sizeof(float));
|
||||||
l.delta = calloc(output_size, sizeof(float));
|
l.delta = (float*)calloc(output_size, sizeof(float));
|
||||||
l.forward = forward_avgpool_layer;
|
l.forward = forward_avgpool_layer;
|
||||||
l.backward = backward_avgpool_layer;
|
l.backward = backward_avgpool_layer;
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
@ -8,6 +8,9 @@
|
|||||||
|
|
||||||
typedef layer avgpool_layer;
|
typedef layer avgpool_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
image get_avgpool_image(avgpool_layer l);
|
image get_avgpool_image(avgpool_layer l);
|
||||||
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
|
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
|
||||||
void resize_avgpool_layer(avgpool_layer *l, int w, int h);
|
void resize_avgpool_layer(avgpool_layer *l, int w, int h);
|
||||||
@ -19,5 +22,8 @@ void forward_avgpool_layer_gpu(avgpool_layer l, network_state state);
|
|||||||
void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
|
void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -2,10 +2,8 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "avgpool_layer.h"
|
#include "avgpool_layer.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
|
__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
|
||||||
{
|
{
|
||||||
|
@ -5,29 +5,29 @@
|
|||||||
layer make_batchnorm_layer(int batch, int w, int h, int c)
|
layer make_batchnorm_layer(int batch, int w, int h, int c)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
|
fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
|
||||||
layer layer = {0};
|
layer layer = { (LAYER_TYPE)0 };
|
||||||
layer.type = BATCHNORM;
|
layer.type = BATCHNORM;
|
||||||
layer.batch = batch;
|
layer.batch = batch;
|
||||||
layer.h = layer.out_h = h;
|
layer.h = layer.out_h = h;
|
||||||
layer.w = layer.out_w = w;
|
layer.w = layer.out_w = w;
|
||||||
layer.c = layer.out_c = c;
|
layer.c = layer.out_c = c;
|
||||||
layer.output = calloc(h * w * c * batch, sizeof(float));
|
layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
|
||||||
layer.delta = calloc(h * w * c * batch, sizeof(float));
|
layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
|
||||||
layer.inputs = w*h*c;
|
layer.inputs = w*h*c;
|
||||||
layer.outputs = layer.inputs;
|
layer.outputs = layer.inputs;
|
||||||
|
|
||||||
layer.scales = calloc(c, sizeof(float));
|
layer.scales = (float*)calloc(c, sizeof(float));
|
||||||
layer.scale_updates = calloc(c, sizeof(float));
|
layer.scale_updates = (float*)calloc(c, sizeof(float));
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < c; ++i){
|
for(i = 0; i < c; ++i){
|
||||||
layer.scales[i] = 1;
|
layer.scales[i] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
layer.mean = calloc(c, sizeof(float));
|
layer.mean = (float*)calloc(c, sizeof(float));
|
||||||
layer.variance = calloc(c, sizeof(float));
|
layer.variance = (float*)calloc(c, sizeof(float));
|
||||||
|
|
||||||
layer.rolling_mean = calloc(c, sizeof(float));
|
layer.rolling_mean = (float*)calloc(c, sizeof(float));
|
||||||
layer.rolling_variance = calloc(c, sizeof(float));
|
layer.rolling_variance = (float*)calloc(c, sizeof(float));
|
||||||
|
|
||||||
layer.forward = forward_batchnorm_layer;
|
layer.forward = forward_batchnorm_layer;
|
||||||
layer.backward = backward_batchnorm_layer;
|
layer.backward = backward_batchnorm_layer;
|
||||||
|
@ -5,6 +5,9 @@
|
|||||||
#include "layer.h"
|
#include "layer.h"
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
layer make_batchnorm_layer(int batch, int w, int h, int c);
|
layer make_batchnorm_layer(int batch, int w, int h, int c);
|
||||||
void forward_batchnorm_layer(layer l, network_state state);
|
void forward_batchnorm_layer(layer l, network_state state);
|
||||||
void backward_batchnorm_layer(layer l, network_state state);
|
void backward_batchnorm_layer(layer l, network_state state);
|
||||||
@ -16,4 +19,7 @@ void pull_batchnorm_layer(layer l);
|
|||||||
void push_batchnorm_layer(layer l);
|
void push_batchnorm_layer(layer l);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -34,7 +34,7 @@ void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride,
|
|||||||
|
|
||||||
void flatten(float *x, int size, int layers, int batch, int forward)
|
void flatten(float *x, int size, int layers, int batch, int forward)
|
||||||
{
|
{
|
||||||
float *swap = calloc(size*layers*batch, sizeof(float));
|
float* swap = (float*)calloc(size * layers * batch, sizeof(float));
|
||||||
int i,c,b;
|
int i,c,b;
|
||||||
for(b = 0; b < batch; ++b){
|
for(b = 0; b < batch; ++b){
|
||||||
for(c = 0; c < layers; ++c){
|
for(c = 0; c < layers; ++c){
|
||||||
|
12
src/blas.h
12
src/blas.h
@ -1,5 +1,12 @@
|
|||||||
#ifndef BLAS_H
|
#ifndef BLAS_H
|
||||||
#define BLAS_H
|
#define BLAS_H
|
||||||
|
#ifdef GPU
|
||||||
|
#include "cuda.h"
|
||||||
|
#include "tree.h"
|
||||||
|
#endif
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void flatten(float *x, int size, int layers, int batch, int forward);
|
void flatten(float *x, int size, int layers, int batch, int forward);
|
||||||
void pm(int M, int N, float *A);
|
void pm(int M, int N, float *A);
|
||||||
float *random_matrix(int rows, int cols);
|
float *random_matrix(int rows, int cols);
|
||||||
@ -41,8 +48,6 @@ void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, i
|
|||||||
void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
#include "cuda.h"
|
|
||||||
#include "tree.h"
|
|
||||||
|
|
||||||
void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
|
void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
|
||||||
void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
||||||
@ -97,5 +102,8 @@ void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int for
|
|||||||
|
|
||||||
void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);
|
void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -3,12 +3,10 @@
|
|||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "blas.h"
|
#include "blas.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "tree.h"
|
#include "tree.h"
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void scale_bias_kernel(float *output, float *biases, int n, int size)
|
__global__ void scale_bias_kernel(float *output, float *biases, int n, int size)
|
||||||
{
|
{
|
||||||
|
@ -249,7 +249,7 @@ int nms_comparator(const void *pa, const void *pb)
|
|||||||
void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh)
|
void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh)
|
||||||
{
|
{
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
sortable_bbox *s = calloc(total, sizeof(sortable_bbox));
|
sortable_bbox* s = (sortable_bbox*)calloc(total, sizeof(sortable_bbox));
|
||||||
|
|
||||||
for(i = 0; i < total; ++i){
|
for(i = 0; i < total; ++i){
|
||||||
s[i].index = i;
|
s[i].index = i;
|
||||||
|
@ -27,6 +27,9 @@ typedef struct detection_with_class {
|
|||||||
int best_class;
|
int best_class;
|
||||||
} detection_with_class;
|
} detection_with_class;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
box float_to_box(float *f);
|
box float_to_box(float *f);
|
||||||
float box_iou(box a, box b);
|
float box_iou(box a, box b);
|
||||||
float box_rmse(box a, box b);
|
float box_rmse(box a, box b);
|
||||||
@ -42,4 +45,7 @@ box encode_box(box b, box anchor);
|
|||||||
// Return number of selected detections in *selected_detections_num
|
// Return number of selected detections in *selected_detections_num
|
||||||
detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names);
|
detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -41,11 +41,11 @@ void train_captcha(char *cfgfile, char *weightfile)
|
|||||||
int i = *net.seen/imgs;
|
int i = *net.seen/imgs;
|
||||||
int solved = 1;
|
int solved = 1;
|
||||||
list *plist;
|
list *plist;
|
||||||
char **labels = get_labels("/data/captcha/reimgs.labels.list");
|
char** labels = get_labels("data/captcha/reimgs.labels.list");
|
||||||
if (solved){
|
if (solved){
|
||||||
plist = get_paths("/data/captcha/reimgs.solved.list");
|
plist = get_paths("data/captcha/reimgs.solved.list");
|
||||||
}else{
|
}else{
|
||||||
plist = get_paths("/data/captcha/reimgs.raw.list");
|
plist = get_paths("data/captcha/reimgs.raw.list");
|
||||||
}
|
}
|
||||||
char **paths = (char **)list_to_array(plist);
|
char **paths = (char **)list_to_array(plist);
|
||||||
printf("%d\n", plist->size);
|
printf("%d\n", plist->size);
|
||||||
@ -89,7 +89,7 @@ void train_captcha(char *cfgfile, char *weightfile)
|
|||||||
free_data(train);
|
free_data(train);
|
||||||
if(i%100==0){
|
if(i%100==0){
|
||||||
char buff[256];
|
char buff[256];
|
||||||
sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i);
|
sprintf(buff, "imagenet_backup/%s_%d.weights", base, i);
|
||||||
save_weights(net, buff);
|
save_weights(net, buff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,7 +104,7 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
|
|||||||
set_batch_network(&net, 1);
|
set_batch_network(&net, 1);
|
||||||
srand(2222222);
|
srand(2222222);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
char **names = get_labels("/data/captcha/reimgs.labels.list");
|
char** names = get_labels("data/captcha/reimgs.labels.list");
|
||||||
char buff[256];
|
char buff[256];
|
||||||
char *input = buff;
|
char *input = buff;
|
||||||
int indexes[26];
|
int indexes[26];
|
||||||
@ -137,12 +137,12 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
|
|||||||
|
|
||||||
void valid_captcha(char *cfgfile, char *weightfile, char *filename)
|
void valid_captcha(char *cfgfile, char *weightfile, char *filename)
|
||||||
{
|
{
|
||||||
char **labels = get_labels("/data/captcha/reimgs.labels.list");
|
char** labels = get_labels("data/captcha/reimgs.labels.list");
|
||||||
network net = parse_network_cfg(cfgfile);
|
network net = parse_network_cfg(cfgfile);
|
||||||
if(weightfile){
|
if(weightfile){
|
||||||
load_weights(&net, weightfile);
|
load_weights(&net, weightfile);
|
||||||
}
|
}
|
||||||
list *plist = get_paths("/data/captcha/reimgs.fg.list");
|
list* plist = get_paths("data/captcha/reimgs.fg.list");
|
||||||
char **paths = (char **)list_to_array(plist);
|
char **paths = (char **)list_to_array(plist);
|
||||||
int N = plist->size;
|
int N = plist->size;
|
||||||
int outputs = net.outputs;
|
int outputs = net.outputs;
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#include "blas.h"
|
#include "blas.h"
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void train_cifar(char *cfgfile, char *weightfile)
|
void train_cifar(char *cfgfile, char *weightfile)
|
||||||
@ -20,7 +20,7 @@ void train_cifar(char *cfgfile, char *weightfile)
|
|||||||
}
|
}
|
||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
|
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char* backup_directory = "backup/";
|
||||||
int classes = 10;
|
int classes = 10;
|
||||||
int N = 50000;
|
int N = 50000;
|
||||||
|
|
||||||
@ -68,7 +68,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
|
|||||||
}
|
}
|
||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
|
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char* backup_directory = "backup/";
|
||||||
int classes = 10;
|
int classes = 10;
|
||||||
int N = 50000;
|
int N = 50000;
|
||||||
|
|
||||||
|
@ -8,20 +8,18 @@
|
|||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <winsock.h>
|
|
||||||
#include "gettimeofday.h"
|
#include "gettimeofday.h"
|
||||||
#else
|
#else
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#include "opencv2/core/version.hpp"
|
#include <opencv2/core/version.hpp>
|
||||||
#ifndef CV_VERSION_EPOCH
|
#ifndef CV_VERSION_EPOCH
|
||||||
#include "opencv2/videoio/videoio_c.h"
|
#include <opencv2/videoio/videoio_c.h>
|
||||||
#endif
|
#endif
|
||||||
image get_image_from_stream(CvCapture *cap);
|
#include "image.h"
|
||||||
image get_image_from_stream_cpp(CvCapture *cap);
|
|
||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
|
IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
|
||||||
|
|
||||||
@ -34,7 +32,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
|
|||||||
|
|
||||||
float *get_regression_values(char **labels, int n)
|
float *get_regression_values(char **labels, int n)
|
||||||
{
|
{
|
||||||
float *v = calloc(n, sizeof(float));
|
float* v = (float*)calloc(n, sizeof(float));
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
char *p = strchr(labels[i], ' ');
|
char *p = strchr(labels[i], ' ');
|
||||||
@ -52,7 +50,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
printf("%d\n", ngpus);
|
printf("%d\n", ngpus);
|
||||||
network *nets = calloc(ngpus, sizeof(network));
|
network* nets = (network*)calloc(ngpus, sizeof(network));
|
||||||
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
int seed = rand();
|
int seed = rand();
|
||||||
@ -431,7 +429,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
|
|||||||
|
|
||||||
float avg_acc = 0;
|
float avg_acc = 0;
|
||||||
float avg_topk = 0;
|
float avg_topk = 0;
|
||||||
int *indexes = calloc(topk, sizeof(int));
|
int* indexes = (int*)calloc(topk, sizeof(int));
|
||||||
|
|
||||||
for(i = 0; i < m; ++i){
|
for(i = 0; i < m; ++i){
|
||||||
int class_id = -1;
|
int class_id = -1;
|
||||||
@ -458,7 +456,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
|
|||||||
images[7] = crop_image(im, 0, 0, w, h);
|
images[7] = crop_image(im, 0, 0, w, h);
|
||||||
images[8] = crop_image(im, -shift, shift, w, h);
|
images[8] = crop_image(im, -shift, shift, w, h);
|
||||||
images[9] = crop_image(im, shift, shift, w, h);
|
images[9] = crop_image(im, shift, shift, w, h);
|
||||||
float *pred = calloc(classes, sizeof(float));
|
float* pred = (float*)calloc(classes, sizeof(float));
|
||||||
for(j = 0; j < 10; ++j){
|
for(j = 0; j < 10; ++j){
|
||||||
float *p = network_predict(net, images[j].data);
|
float *p = network_predict(net, images[j].data);
|
||||||
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
|
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
|
||||||
@ -504,7 +502,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
|
|||||||
|
|
||||||
float avg_acc = 0;
|
float avg_acc = 0;
|
||||||
float avg_topk = 0;
|
float avg_topk = 0;
|
||||||
int *indexes = calloc(topk, sizeof(int));
|
int* indexes = (int*)calloc(topk, sizeof(int));
|
||||||
|
|
||||||
int size = net.w;
|
int size = net.w;
|
||||||
for(i = 0; i < m; ++i){
|
for(i = 0; i < m; ++i){
|
||||||
@ -581,7 +579,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile
|
|||||||
|
|
||||||
float avg_acc = 0;
|
float avg_acc = 0;
|
||||||
float avg_topk = 0;
|
float avg_topk = 0;
|
||||||
int *indexes = calloc(topk, sizeof(int));
|
int* indexes = (int*)calloc(topk, sizeof(int));
|
||||||
|
|
||||||
for(i = 0; i < m; ++i){
|
for(i = 0; i < m; ++i){
|
||||||
int class_id = -1;
|
int class_id = -1;
|
||||||
@ -651,7 +649,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
|
|||||||
|
|
||||||
float avg_acc = 0;
|
float avg_acc = 0;
|
||||||
float avg_topk = 0;
|
float avg_topk = 0;
|
||||||
int *indexes = calloc(topk, sizeof(int));
|
int* indexes = (int*)calloc(topk, sizeof(int));
|
||||||
|
|
||||||
for(i = 0; i < m; ++i){
|
for(i = 0; i < m; ++i){
|
||||||
int class_id = -1;
|
int class_id = -1;
|
||||||
@ -662,7 +660,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
float *pred = calloc(classes, sizeof(float));
|
float* pred = (float*)calloc(classes, sizeof(float));
|
||||||
image im = load_image_color(paths[i], 0, 0);
|
image im = load_image_color(paths[i], 0, 0);
|
||||||
for(j = 0; j < nscales; ++j){
|
for(j = 0; j < nscales; ++j){
|
||||||
image r = resize_min(im, scales[j]);
|
image r = resize_min(im, scales[j]);
|
||||||
@ -707,7 +705,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena
|
|||||||
int i = 0;
|
int i = 0;
|
||||||
char **names = get_labels(name_list);
|
char **names = get_labels(name_list);
|
||||||
clock_t time;
|
clock_t time;
|
||||||
int *indexes = calloc(top, sizeof(int));
|
int* indexes = (int*)calloc(top, sizeof(int));
|
||||||
char buff[256];
|
char buff[256];
|
||||||
char *input = buff;
|
char *input = buff;
|
||||||
while(1){
|
while(1){
|
||||||
@ -790,7 +788,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
|
|||||||
int i = 0;
|
int i = 0;
|
||||||
char **names = get_labels(name_list);
|
char **names = get_labels(name_list);
|
||||||
clock_t time;
|
clock_t time;
|
||||||
int *indexes = calloc(top, sizeof(int));
|
int* indexes = (int*)calloc(top, sizeof(int));
|
||||||
char buff[256];
|
char buff[256];
|
||||||
char *input = buff;
|
char *input = buff;
|
||||||
int size = net.w;
|
int size = net.w;
|
||||||
@ -973,7 +971,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
|
|||||||
char *name_list = option_find_str(options, "names", 0);
|
char *name_list = option_find_str(options, "names", 0);
|
||||||
char **names = get_labels(name_list);
|
char **names = get_labels(name_list);
|
||||||
|
|
||||||
int *indexes = calloc(top, sizeof(int));
|
int* indexes = (int*)calloc(top, sizeof(int));
|
||||||
|
|
||||||
if(!cap) error("Couldn't connect to webcam.\n");
|
if(!cap) error("Couldn't connect to webcam.\n");
|
||||||
//cvNamedWindow("Threat", CV_WINDOW_NORMAL);
|
//cvNamedWindow("Threat", CV_WINDOW_NORMAL);
|
||||||
@ -1051,11 +1049,13 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
|
|||||||
}
|
}
|
||||||
top_predictions(net, top, indexes);
|
top_predictions(net, top, indexes);
|
||||||
char buff[256];
|
char buff[256];
|
||||||
sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
|
sprintf(buff, "tmp/threat_%06d", count);
|
||||||
//save_image(out, buff);
|
//save_image(out, buff);
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
printf("\033[2J");
|
printf("\033[2J");
|
||||||
printf("\033[1;1H");
|
printf("\033[1;1H");
|
||||||
|
#endif
|
||||||
printf("\nFPS:%.0f\n",fps);
|
printf("\nFPS:%.0f\n",fps);
|
||||||
|
|
||||||
for(i = 0; i < top; ++i){
|
for(i = 0; i < top; ++i){
|
||||||
@ -1111,7 +1111,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
|
|||||||
char *name_list = option_find_str(options, "names", 0);
|
char *name_list = option_find_str(options, "names", 0);
|
||||||
char **names = get_labels(name_list);
|
char **names = get_labels(name_list);
|
||||||
|
|
||||||
int *indexes = calloc(top, sizeof(int));
|
int* indexes = (int*)calloc(top, sizeof(int));
|
||||||
|
|
||||||
if(!cap) error("Couldn't connect to webcam.\n");
|
if(!cap) error("Couldn't connect to webcam.\n");
|
||||||
cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL);
|
cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL);
|
||||||
@ -1193,7 +1193,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
|
|||||||
char *name_list = option_find_str(options, "names", 0);
|
char *name_list = option_find_str(options, "names", 0);
|
||||||
char **names = get_labels(name_list);
|
char **names = get_labels(name_list);
|
||||||
|
|
||||||
int *indexes = calloc(top, sizeof(int));
|
int* indexes = (int*)calloc(top, sizeof(int));
|
||||||
|
|
||||||
if(!cap) error("Couldn't connect to webcam.\n");
|
if(!cap) error("Couldn't connect to webcam.\n");
|
||||||
cvNamedWindow("Classifier", CV_WINDOW_NORMAL);
|
cvNamedWindow("Classifier", CV_WINDOW_NORMAL);
|
||||||
@ -1214,8 +1214,10 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
|
|||||||
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1);
|
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1);
|
||||||
top_predictions(net, top, indexes);
|
top_predictions(net, top, indexes);
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
printf("\033[2J");
|
printf("\033[2J");
|
||||||
printf("\033[1;1H");
|
printf("\033[1;1H");
|
||||||
|
#endif
|
||||||
printf("\nFPS:%.0f\n",fps);
|
printf("\nFPS:%.0f\n",fps);
|
||||||
|
|
||||||
for(i = 0; i < top; ++i){
|
for(i = 0; i < top; ++i){
|
||||||
@ -1257,7 +1259,7 @@ void run_classifier(int argc, char **argv)
|
|||||||
for(i = 0; i < len; ++i){
|
for(i = 0; i < len; ++i){
|
||||||
if (gpu_list[i] == ',') ++ngpus;
|
if (gpu_list[i] == ',') ++ngpus;
|
||||||
}
|
}
|
||||||
gpus = calloc(ngpus, sizeof(int));
|
gpus = (int*)calloc(ngpus, sizeof(int));
|
||||||
for(i = 0; i < ngpus; ++i){
|
for(i = 0; i < ngpus; ++i){
|
||||||
gpus[i] = atoi(gpu_list);
|
gpus[i] = atoi(gpu_list);
|
||||||
gpu_list = strchr(gpu_list, ',')+1;
|
gpu_list = strchr(gpu_list, ',')+1;
|
||||||
|
@ -1,2 +1,12 @@
|
|||||||
|
#ifndef CLASSIFIER_H
|
||||||
|
#define CLASSIFIER_H
|
||||||
|
|
||||||
|
#include "list.h"
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
list *read_data_cfg(char *filename);
|
list *read_data_cfg(char *filename);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
36
src/coco.c
36
src/coco.c
@ -9,7 +9,7 @@
|
|||||||
#include "demo.h"
|
#include "demo.h"
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};
|
char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};
|
||||||
@ -22,7 +22,7 @@ void train_coco(char *cfgfile, char *weightfile)
|
|||||||
//char *train_images = "/home/pjreddie/data/coco/train.txt";
|
//char *train_images = "/home/pjreddie/data/coco/train.txt";
|
||||||
char *train_images = "data/coco.trainval.txt";
|
char *train_images = "data/coco.trainval.txt";
|
||||||
//char *train_images = "data/bags.train.list";
|
//char *train_images = "data/bags.train.list";
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char* backup_directory = "backup/";
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
@ -164,9 +164,9 @@ void validate_coco(char *cfgfile, char *weightfile)
|
|||||||
FILE *fp = fopen(buff, "w");
|
FILE *fp = fopen(buff, "w");
|
||||||
fprintf(fp, "[\n");
|
fprintf(fp, "[\n");
|
||||||
|
|
||||||
box *boxes = calloc(side*side*l.n, sizeof(box));
|
box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
|
||||||
float **probs = calloc(side*side*l.n, sizeof(float *));
|
float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
|
||||||
for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
|
||||||
|
|
||||||
int m = plist->size;
|
int m = plist->size;
|
||||||
int i=0;
|
int i=0;
|
||||||
@ -177,11 +177,11 @@ void validate_coco(char *cfgfile, char *weightfile)
|
|||||||
float iou_thresh = .5;
|
float iou_thresh = .5;
|
||||||
|
|
||||||
int nthreads = 8;
|
int nthreads = 8;
|
||||||
image *val = calloc(nthreads, sizeof(image));
|
image* val = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *val_resized = calloc(nthreads, sizeof(image));
|
image* val_resized = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *buf = calloc(nthreads, sizeof(image));
|
image* buf = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *buf_resized = calloc(nthreads, sizeof(image));
|
image* buf_resized = (image*)calloc(nthreads, sizeof(image));
|
||||||
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
|
pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
|
||||||
|
|
||||||
load_args args = {0};
|
load_args args = {0};
|
||||||
args.w = net.w;
|
args.w = net.w;
|
||||||
@ -240,7 +240,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
|
|||||||
srand(time(0));
|
srand(time(0));
|
||||||
|
|
||||||
char *base = "results/comp4_det_test_";
|
char *base = "results/comp4_det_test_";
|
||||||
list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt");
|
list* plist = get_paths("data/voc/test/2007_test.txt");
|
||||||
char **paths = (char **)list_to_array(plist);
|
char **paths = (char **)list_to_array(plist);
|
||||||
|
|
||||||
layer l = net.layers[net.n-1];
|
layer l = net.layers[net.n-1];
|
||||||
@ -248,15 +248,15 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
|
|||||||
int side = l.side;
|
int side = l.side;
|
||||||
|
|
||||||
int j, k;
|
int j, k;
|
||||||
FILE **fps = calloc(classes, sizeof(FILE *));
|
FILE** fps = (FILE**)calloc(classes, sizeof(FILE*));
|
||||||
for(j = 0; j < classes; ++j){
|
for(j = 0; j < classes; ++j){
|
||||||
char buff[1024];
|
char buff[1024];
|
||||||
snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
|
snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
|
||||||
fps[j] = fopen(buff, "w");
|
fps[j] = fopen(buff, "w");
|
||||||
}
|
}
|
||||||
box *boxes = calloc(side*side*l.n, sizeof(box));
|
box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
|
||||||
float **probs = calloc(side*side*l.n, sizeof(float *));
|
float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
|
||||||
for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
|
||||||
|
|
||||||
int m = plist->size;
|
int m = plist->size;
|
||||||
int i=0;
|
int i=0;
|
||||||
@ -328,9 +328,9 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
|
|||||||
char buff[256];
|
char buff[256];
|
||||||
char *input = buff;
|
char *input = buff;
|
||||||
int j;
|
int j;
|
||||||
box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
|
box* boxes = (box*)calloc(l.side * l.side * l.n, sizeof(box));
|
||||||
float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
|
float** probs = (float**)calloc(l.side * l.side * l.n, sizeof(float*));
|
||||||
for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
|
for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
|
||||||
while(1){
|
while(1){
|
||||||
if(filename){
|
if(filename){
|
||||||
strncpy(input, filename, 256);
|
strncpy(input, filename, 256);
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include "col2im.h"
|
||||||
void col2im_add_pixel(float *im, int height, int width, int channels,
|
void col2im_add_pixel(float *im, int height, int width, int channels,
|
||||||
int row, int col, int channel, int pad, float val)
|
int row, int col, int channel, int pad, float val)
|
||||||
{
|
{
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#ifndef COL2IM_H
|
#ifndef COL2IM_H
|
||||||
#define COL2IM_H
|
#define COL2IM_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void col2im_cpu(float* data_col,
|
void col2im_cpu(float* data_col,
|
||||||
int channels, int height, int width,
|
int channels, int height, int width,
|
||||||
int ksize, int stride, int pad, float* data_im);
|
int ksize, int stride, int pad, float* data_im);
|
||||||
@ -10,4 +13,7 @@ void col2im_ongpu(float *data_col,
|
|||||||
int channels, int height, int width,
|
int channels, int height, int width,
|
||||||
int ksize, int stride, int pad, float *data_im);
|
int ksize, int stride, int pad, float *data_im);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,10 +2,8 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "col2im.h"
|
#include "col2im.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
|
||||||
|
|
||||||
// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
|
// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
|
||||||
// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
|
// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
|
||||||
|
@ -12,7 +12,7 @@ void train_compare(char *cfgfile, char *weightfile)
|
|||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char* backup_directory = "backup/";
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
network net = parse_network_cfg(cfgfile);
|
network net = parse_network_cfg(cfgfile);
|
||||||
if(weightfile){
|
if(weightfile){
|
||||||
@ -176,7 +176,7 @@ int bbox_comparator(const void *a, const void *b)
|
|||||||
|
|
||||||
image im1 = load_image_color(box1.filename, net.w, net.h);
|
image im1 = load_image_color(box1.filename, net.w, net.h);
|
||||||
image im2 = load_image_color(box2.filename, net.w, net.h);
|
image im2 = load_image_color(box2.filename, net.w, net.h);
|
||||||
float *X = calloc(net.w*net.h*net.c, sizeof(float));
|
float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
|
||||||
memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float));
|
memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float));
|
||||||
memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
|
memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
|
||||||
float *predictions = network_predict(net, X);
|
float *predictions = network_predict(net, X);
|
||||||
@ -205,7 +205,7 @@ void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, in
|
|||||||
{
|
{
|
||||||
image im1 = load_image_color(a->filename, net.w, net.h);
|
image im1 = load_image_color(a->filename, net.w, net.h);
|
||||||
image im2 = load_image_color(b->filename, net.w, net.h);
|
image im2 = load_image_color(b->filename, net.w, net.h);
|
||||||
float *X = calloc(net.w*net.h*net.c, sizeof(float));
|
float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
|
||||||
memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float));
|
memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float));
|
||||||
memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
|
memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
|
||||||
float *predictions = network_predict(net, X);
|
float *predictions = network_predict(net, X);
|
||||||
@ -239,7 +239,7 @@ void SortMaster3000(char *filename, char *weightfile)
|
|||||||
char **paths = (char **)list_to_array(plist);
|
char **paths = (char **)list_to_array(plist);
|
||||||
int N = plist->size;
|
int N = plist->size;
|
||||||
free_list(plist);
|
free_list(plist);
|
||||||
sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
|
sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
|
||||||
printf("Sorting %d boxes...\n", N);
|
printf("Sorting %d boxes...\n", N);
|
||||||
for(i = 0; i < N; ++i){
|
for(i = 0; i < N; ++i){
|
||||||
boxes[i].filename = paths[i];
|
boxes[i].filename = paths[i];
|
||||||
@ -274,13 +274,13 @@ void BattleRoyaleWithCheese(char *filename, char *weightfile)
|
|||||||
int N = plist->size;
|
int N = plist->size;
|
||||||
int total = N;
|
int total = N;
|
||||||
free_list(plist);
|
free_list(plist);
|
||||||
sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
|
sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
|
||||||
printf("Battling %d boxes...\n", N);
|
printf("Battling %d boxes...\n", N);
|
||||||
for(i = 0; i < N; ++i){
|
for(i = 0; i < N; ++i){
|
||||||
boxes[i].filename = paths[i];
|
boxes[i].filename = paths[i];
|
||||||
boxes[i].net = net;
|
boxes[i].net = net;
|
||||||
boxes[i].classes = classes;
|
boxes[i].classes = classes;
|
||||||
boxes[i].elos = calloc(classes, sizeof(float));;
|
boxes[i].elos = (float*)calloc(classes, sizeof(float));
|
||||||
for(j = 0; j < classes; ++j){
|
for(j = 0; j < classes; ++j){
|
||||||
boxes[i].elos[j] = 1500;
|
boxes[i].elos[j] = 1500;
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,7 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
|
|||||||
{
|
{
|
||||||
int total_batch = batch*steps;
|
int total_batch = batch*steps;
|
||||||
int i;
|
int i;
|
||||||
connected_layer l = {0};
|
connected_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = CONNECTED;
|
l.type = CONNECTED;
|
||||||
|
|
||||||
l.inputs = inputs;
|
l.inputs = inputs;
|
||||||
@ -74,14 +74,14 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
|
|||||||
l.activation = activation;
|
l.activation = activation;
|
||||||
l.learning_rate_scale = 1;
|
l.learning_rate_scale = 1;
|
||||||
|
|
||||||
l.output = calloc(total_batch*outputs, sizeof(float));
|
l.output = (float*)calloc(total_batch * outputs, sizeof(float));
|
||||||
l.delta = calloc(total_batch*outputs, sizeof(float));
|
l.delta = (float*)calloc(total_batch * outputs, sizeof(float));
|
||||||
|
|
||||||
l.weight_updates = calloc(inputs*outputs, sizeof(float));
|
l.weight_updates = (float*)calloc(inputs * outputs, sizeof(float));
|
||||||
l.bias_updates = calloc(outputs, sizeof(float));
|
l.bias_updates = (float*)calloc(outputs, sizeof(float));
|
||||||
|
|
||||||
l.weights = calloc(outputs*inputs, sizeof(float));
|
l.weights = (float*)calloc(outputs * inputs, sizeof(float));
|
||||||
l.biases = calloc(outputs, sizeof(float));
|
l.biases = (float*)calloc(outputs, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_connected_layer;
|
l.forward = forward_connected_layer;
|
||||||
l.backward = backward_connected_layer;
|
l.backward = backward_connected_layer;
|
||||||
@ -98,22 +98,22 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(batch_normalize){
|
if(batch_normalize){
|
||||||
l.scales = calloc(outputs, sizeof(float));
|
l.scales = (float*)calloc(outputs, sizeof(float));
|
||||||
l.scale_updates = calloc(outputs, sizeof(float));
|
l.scale_updates = (float*)calloc(outputs, sizeof(float));
|
||||||
for(i = 0; i < outputs; ++i){
|
for(i = 0; i < outputs; ++i){
|
||||||
l.scales[i] = 1;
|
l.scales[i] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
l.mean = calloc(outputs, sizeof(float));
|
l.mean = (float*)calloc(outputs, sizeof(float));
|
||||||
l.mean_delta = calloc(outputs, sizeof(float));
|
l.mean_delta = (float*)calloc(outputs, sizeof(float));
|
||||||
l.variance = calloc(outputs, sizeof(float));
|
l.variance = (float*)calloc(outputs, sizeof(float));
|
||||||
l.variance_delta = calloc(outputs, sizeof(float));
|
l.variance_delta = (float*)calloc(outputs, sizeof(float));
|
||||||
|
|
||||||
l.rolling_mean = calloc(outputs, sizeof(float));
|
l.rolling_mean = (float*)calloc(outputs, sizeof(float));
|
||||||
l.rolling_variance = calloc(outputs, sizeof(float));
|
l.rolling_variance = (float*)calloc(outputs, sizeof(float));
|
||||||
|
|
||||||
l.x = calloc(total_batch*outputs, sizeof(float));
|
l.x = (float*)calloc(total_batch * outputs, sizeof(float));
|
||||||
l.x_norm = calloc(total_batch*outputs, sizeof(float));
|
l.x_norm = (float*)calloc(total_batch * outputs, sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
@ -7,7 +7,11 @@
|
|||||||
|
|
||||||
typedef layer connected_layer;
|
typedef layer connected_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
connected_layer make_connected_layer(int batch, int steps, int inputs, int outputs, ACTIVATION activation, int batch_normalize);
|
connected_layer make_connected_layer(int batch, int steps, int inputs, int outputs, ACTIVATION activation, int batch_normalize);
|
||||||
|
size_t get_connected_workspace_size(layer l);
|
||||||
|
|
||||||
void forward_connected_layer(connected_layer layer, network_state state);
|
void forward_connected_layer(connected_layer layer, network_state state);
|
||||||
void backward_connected_layer(connected_layer layer, network_state state);
|
void backward_connected_layer(connected_layer layer, network_state state);
|
||||||
@ -23,5 +27,8 @@ void push_connected_layer(connected_layer layer);
|
|||||||
void pull_connected_layer(connected_layer layer);
|
void pull_connected_layer(connected_layer layer);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -3,10 +3,11 @@
|
|||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
|
#ifndef USE_CMAKE_LIBS
|
||||||
#pragma comment(lib, "cudnn.lib")
|
#pragma comment(lib, "cudnn.lib")
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "convolutional_layer.h"
|
#include "convolutional_layer.h"
|
||||||
#include "batchnorm_layer.h"
|
#include "batchnorm_layer.h"
|
||||||
#include "gemm.h"
|
#include "gemm.h"
|
||||||
@ -15,7 +16,7 @@ extern "C" {
|
|||||||
#include "col2im.h"
|
#include "col2im.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void binarize_kernel(float *x, int n, float *binary)
|
__global__ void binarize_kernel(float *x, int n, float *binary)
|
||||||
{
|
{
|
||||||
@ -73,7 +74,6 @@ void binarize_weights_gpu(float *weights, int n, int size, float *binary)
|
|||||||
CHECK_CUDA(cudaPeekAtLastError());
|
CHECK_CUDA(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
#define WARP_SIZE 32
|
|
||||||
|
|
||||||
__global__ void set_zero_kernel(float *src, int size)
|
__global__ void set_zero_kernel(float *src, int size)
|
||||||
{
|
{
|
||||||
@ -477,10 +477,10 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
|||||||
simple_copy_ongpu(l.outputs*l.batch / 2, output16, l.x_gpu);
|
simple_copy_ongpu(l.outputs*l.batch / 2, output16, l.x_gpu);
|
||||||
//copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1);
|
//copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1);
|
||||||
//cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream());
|
//cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream());
|
||||||
float one = 1;
|
float one = 1.0f;
|
||||||
float zero = 0;
|
float zero = 0.0f;
|
||||||
// Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth
|
// Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth
|
||||||
// compared to FP32, it<EFBFBD>s just that the statistics and value adjustment should be done in FP32.
|
// compared to FP32, it's just that the statistics and value adjustment should be done in FP32.
|
||||||
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(cudnn_handle(),
|
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(cudnn_handle(),
|
||||||
CUDNN_BATCHNORM_SPATIAL,
|
CUDNN_BATCHNORM_SPATIAL,
|
||||||
&one,
|
&one,
|
||||||
@ -639,8 +639,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
|
|||||||
// l.mean_gpu = l.rolling_mean_gpu;
|
// l.mean_gpu = l.rolling_mean_gpu;
|
||||||
// l.variance_gpu = l.rolling_variance_gpu;
|
// l.variance_gpu = l.rolling_variance_gpu;
|
||||||
//}
|
//}
|
||||||
float one = 1;
|
float one = 1.0f;
|
||||||
float zero = 0;
|
float zero = 0.0f;
|
||||||
CHECK_CUDNN(cudnnBatchNormalizationBackward(cudnn_handle(),
|
CHECK_CUDNN(cudnnBatchNormalizationBackward(cudnn_handle(),
|
||||||
CUDNN_BATCHNORM_SPATIAL,
|
CUDNN_BATCHNORM_SPATIAL,
|
||||||
&one,
|
&one,
|
||||||
@ -936,4 +936,3 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -9,8 +9,10 @@
|
|||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
|
#ifndef USE_CMAKE_LIBS
|
||||||
#pragma comment(lib, "cudnn.lib")
|
#pragma comment(lib, "cudnn.lib")
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef AI2
|
#ifdef AI2
|
||||||
#include "xnor_layer.h"
|
#include "xnor_layer.h"
|
||||||
@ -288,7 +290,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
|
|||||||
l->weightDesc,
|
l->weightDesc,
|
||||||
l->convDesc,
|
l->convDesc,
|
||||||
l->dstTensorDesc,
|
l->dstTensorDesc,
|
||||||
forward_algo,
|
(cudnnConvolutionFwdPreference_t)forward_algo,
|
||||||
0,
|
0,
|
||||||
&l->fw_algo));
|
&l->fw_algo));
|
||||||
CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
|
CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
|
||||||
@ -296,7 +298,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
|
|||||||
l->ddstTensorDesc,
|
l->ddstTensorDesc,
|
||||||
l->convDesc,
|
l->convDesc,
|
||||||
l->dsrcTensorDesc,
|
l->dsrcTensorDesc,
|
||||||
backward_algo,
|
(cudnnConvolutionBwdDataPreference_t)backward_algo,
|
||||||
0,
|
0,
|
||||||
&l->bd_algo));
|
&l->bd_algo));
|
||||||
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
|
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
|
||||||
@ -304,7 +306,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
|
|||||||
l->ddstTensorDesc,
|
l->ddstTensorDesc,
|
||||||
l->convDesc,
|
l->convDesc,
|
||||||
l->dweightDesc,
|
l->dweightDesc,
|
||||||
backward_filter,
|
(cudnnConvolutionBwdFilterPreference_t)backward_filter,
|
||||||
0,
|
0,
|
||||||
&l->bf_algo));
|
&l->bf_algo));
|
||||||
|
|
||||||
@ -328,7 +330,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
|||||||
{
|
{
|
||||||
int total_batch = batch*steps;
|
int total_batch = batch*steps;
|
||||||
int i;
|
int i;
|
||||||
convolutional_layer l = {0};
|
convolutional_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = CONVOLUTIONAL;
|
l.type = CONVOLUTIONAL;
|
||||||
|
|
||||||
l.index = index;
|
l.index = index;
|
||||||
@ -346,11 +348,11 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
|||||||
l.batch_normalize = batch_normalize;
|
l.batch_normalize = batch_normalize;
|
||||||
l.learning_rate_scale = 1;
|
l.learning_rate_scale = 1;
|
||||||
|
|
||||||
l.weights = calloc(c*n*size*size, sizeof(float));
|
l.weights = (float*)calloc(c * n * size * size, sizeof(float));
|
||||||
l.weight_updates = calloc(c*n*size*size, sizeof(float));
|
l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
|
||||||
|
|
||||||
l.biases = calloc(n, sizeof(float));
|
l.biases = (float*)calloc(n, sizeof(float));
|
||||||
l.bias_updates = calloc(n, sizeof(float));
|
l.bias_updates = (float*)calloc(n, sizeof(float));
|
||||||
|
|
||||||
// float scale = 1./sqrt(size*size*c);
|
// float scale = 1./sqrt(size*size*c);
|
||||||
float scale = sqrt(2./(size*size*c));
|
float scale = sqrt(2./(size*size*c));
|
||||||
@ -364,64 +366,64 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
|||||||
l.inputs = l.w * l.h * l.c;
|
l.inputs = l.w * l.h * l.c;
|
||||||
l.activation = activation;
|
l.activation = activation;
|
||||||
|
|
||||||
l.output = calloc(total_batch*l.outputs, sizeof(float));
|
l.output = (float*)calloc(total_batch*l.outputs, sizeof(float));
|
||||||
l.delta = calloc(total_batch*l.outputs, sizeof(float));
|
l.delta = (float*)calloc(total_batch*l.outputs, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_convolutional_layer;
|
l.forward = forward_convolutional_layer;
|
||||||
l.backward = backward_convolutional_layer;
|
l.backward = backward_convolutional_layer;
|
||||||
l.update = update_convolutional_layer;
|
l.update = update_convolutional_layer;
|
||||||
if(binary){
|
if(binary){
|
||||||
l.binary_weights = calloc(c*n*size*size, sizeof(float));
|
l.binary_weights = (float*)calloc(c * n * size * size, sizeof(float));
|
||||||
l.cweights = calloc(c*n*size*size, sizeof(char));
|
l.cweights = (char*)calloc(c * n * size * size, sizeof(char));
|
||||||
l.scales = calloc(n, sizeof(float));
|
l.scales = (float*)calloc(n, sizeof(float));
|
||||||
}
|
}
|
||||||
if(xnor){
|
if(xnor){
|
||||||
l.binary_weights = calloc(c*n*size*size, sizeof(float));
|
l.binary_weights = (float*)calloc(c * n * size * size, sizeof(float));
|
||||||
l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
|
l.binary_input = (float*)calloc(l.inputs * l.batch, sizeof(float));
|
||||||
|
|
||||||
int align = 32;// 8;
|
int align = 32;// 8;
|
||||||
int src_align = l.out_h*l.out_w;
|
int src_align = l.out_h*l.out_w;
|
||||||
l.bit_align = src_align + (align - src_align % align);
|
l.bit_align = src_align + (align - src_align % align);
|
||||||
|
|
||||||
l.mean_arr = calloc(l.n, sizeof(float));
|
l.mean_arr = (float*)calloc(l.n, sizeof(float));
|
||||||
|
|
||||||
const size_t new_c = l.c / 32;
|
const size_t new_c = l.c / 32;
|
||||||
size_t in_re_packed_input_size = new_c * l.w * l.h + 1;
|
size_t in_re_packed_input_size = new_c * l.w * l.h + 1;
|
||||||
l.bin_re_packed_input = calloc(in_re_packed_input_size, sizeof(uint32_t));
|
l.bin_re_packed_input = (uint32_t*)calloc(in_re_packed_input_size, sizeof(uint32_t));
|
||||||
|
|
||||||
l.lda_align = 256; // AVX2
|
l.lda_align = 256; // AVX2
|
||||||
int k = l.size*l.size*l.c;
|
int k = l.size*l.size*l.c;
|
||||||
size_t k_aligned = k + (l.lda_align - k%l.lda_align);
|
size_t k_aligned = k + (l.lda_align - k%l.lda_align);
|
||||||
size_t t_bit_input_size = k_aligned * l.bit_align / 8;
|
size_t t_bit_input_size = k_aligned * l.bit_align / 8;
|
||||||
l.t_bit_input = calloc(t_bit_input_size, sizeof(char));
|
l.t_bit_input = (char*)calloc(t_bit_input_size, sizeof(char));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(batch_normalize){
|
if(batch_normalize){
|
||||||
l.scales = calloc(n, sizeof(float));
|
l.scales = (float*)calloc(n, sizeof(float));
|
||||||
l.scale_updates = calloc(n, sizeof(float));
|
l.scale_updates = (float*)calloc(n, sizeof(float));
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
l.scales[i] = 1;
|
l.scales[i] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
l.mean = calloc(n, sizeof(float));
|
l.mean = (float*)calloc(n, sizeof(float));
|
||||||
l.variance = calloc(n, sizeof(float));
|
l.variance = (float*)calloc(n, sizeof(float));
|
||||||
|
|
||||||
l.mean_delta = calloc(n, sizeof(float));
|
l.mean_delta = (float*)calloc(n, sizeof(float));
|
||||||
l.variance_delta = calloc(n, sizeof(float));
|
l.variance_delta = (float*)calloc(n, sizeof(float));
|
||||||
|
|
||||||
l.rolling_mean = calloc(n, sizeof(float));
|
l.rolling_mean = (float*)calloc(n, sizeof(float));
|
||||||
l.rolling_variance = calloc(n, sizeof(float));
|
l.rolling_variance = (float*)calloc(n, sizeof(float));
|
||||||
l.x = calloc(total_batch*l.outputs, sizeof(float));
|
l.x = (float*)calloc(total_batch * l.outputs, sizeof(float));
|
||||||
l.x_norm = calloc(total_batch*l.outputs, sizeof(float));
|
l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float));
|
||||||
}
|
}
|
||||||
if(adam){
|
if(adam){
|
||||||
l.adam = 1;
|
l.adam = 1;
|
||||||
l.m = calloc(c*n*size*size, sizeof(float));
|
l.m = (float*)calloc(c * n * size * size, sizeof(float));
|
||||||
l.v = calloc(c*n*size*size, sizeof(float));
|
l.v = (float*)calloc(c * n * size * size, sizeof(float));
|
||||||
l.bias_m = calloc(n, sizeof(float));
|
l.bias_m = (float*)calloc(n, sizeof(float));
|
||||||
l.scale_m = calloc(n, sizeof(float));
|
l.scale_m = (float*)calloc(n, sizeof(float));
|
||||||
l.bias_v = calloc(n, sizeof(float));
|
l.bias_v = (float*)calloc(n, sizeof(float));
|
||||||
l.scale_v = calloc(n, sizeof(float));
|
l.scale_v = (float*)calloc(n, sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
@ -549,11 +551,11 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
|
|||||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||||
l->inputs = l->w * l->h * l->c;
|
l->inputs = l->w * l->h * l->c;
|
||||||
|
|
||||||
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
|
l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
|
||||||
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
|
l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
|
||||||
if(l->batch_normalize){
|
if(l->batch_normalize){
|
||||||
l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
|
l->x = (float*)realloc(l->x, l->batch * l->outputs * sizeof(float));
|
||||||
l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
|
l->x_norm = (float*)realloc(l->x_norm, l->batch * l->outputs * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (l->xnor) {
|
if (l->xnor) {
|
||||||
@ -642,7 +644,7 @@ void gemm_nn_custom(int M, int N, int K, float ALPHA,
|
|||||||
int i, j, k;
|
int i, j, k;
|
||||||
for (i = 0; i < M; ++i) {
|
for (i = 0; i < M; ++i) {
|
||||||
for (k = 0; k < K; ++k) {
|
for (k = 0; k < K; ++k) {
|
||||||
register float A_PART = ALPHA*A[i*lda + k];
|
float A_PART = ALPHA * A[i * lda + k];
|
||||||
//printf("\n weight = %f \n", A_PART);
|
//printf("\n weight = %f \n", A_PART);
|
||||||
for (j = 0; j < N; ++j) {
|
for (j = 0; j < N; ++j) {
|
||||||
C[i*ldc + j] += A_PART*B[k*ldb + j];
|
C[i*ldc + j] += A_PART*B[k*ldb + j];
|
||||||
@ -695,8 +697,8 @@ void binary_align_weights(convolutional_layer *l)
|
|||||||
|
|
||||||
size_t align_weights_size = new_lda * m;
|
size_t align_weights_size = new_lda * m;
|
||||||
l->align_bit_weights_size = align_weights_size / 8 + 1;
|
l->align_bit_weights_size = align_weights_size / 8 + 1;
|
||||||
float *align_weights = calloc(align_weights_size, sizeof(float));
|
float* align_weights = (float*)calloc(align_weights_size, sizeof(float));
|
||||||
l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char));
|
l->align_bit_weights = (char*)calloc(l->align_bit_weights_size, sizeof(char));
|
||||||
|
|
||||||
size_t i, j;
|
size_t i, j;
|
||||||
// align A without transpose
|
// align A without transpose
|
||||||
@ -739,7 +741,7 @@ void binary_align_weights(convolutional_layer *l)
|
|||||||
//printf("\n l.index = %d \t aw[0] = %f, aw[1] = %f, aw[2] = %f, aw[3] = %f \n", l->index, align_weights[0], align_weights[1], align_weights[2], align_weights[3]);
|
//printf("\n l.index = %d \t aw[0] = %f, aw[1] = %f, aw[2] = %f, aw[3] = %f \n", l->index, align_weights[0], align_weights[1], align_weights[2], align_weights[3]);
|
||||||
//memcpy(l->binary_weights, align_weights, (l->size * l->size * l->c * l->n) * sizeof(float));
|
//memcpy(l->binary_weights, align_weights, (l->size * l->size * l->c * l->n) * sizeof(float));
|
||||||
|
|
||||||
float_to_bit(align_weights, l->align_bit_weights, align_weights_size);
|
float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size);
|
||||||
|
|
||||||
//if (l->n >= 32)
|
//if (l->n >= 32)
|
||||||
if(gpu_index >= 0)
|
if(gpu_index >= 0)
|
||||||
@ -757,7 +759,7 @@ void binary_align_weights(convolutional_layer *l)
|
|||||||
//get_mean_array(l->binary_weights, m*new_lda, l->n, l->mean_arr);
|
//get_mean_array(l->binary_weights, m*new_lda, l->n, l->mean_arr);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
float_to_bit(align_weights, l->align_bit_weights, align_weights_size);
|
float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size);
|
||||||
|
|
||||||
get_mean_array(l->binary_weights, m*k, l->n, l->mean_arr);
|
get_mean_array(l->binary_weights, m*k, l->n, l->mean_arr);
|
||||||
}
|
}
|
||||||
@ -808,7 +810,7 @@ size_t binary_transpose_align_input(int k, int n, float *b, char **t_bit_input,
|
|||||||
// t_bit_input - [new_ldb, n] - [k', n]
|
// t_bit_input - [new_ldb, n] - [k', n]
|
||||||
|
|
||||||
//transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8);
|
//transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8);
|
||||||
transpose_bin(b, *t_bit_input, k, n, bit_align, new_ldb, 8);
|
transpose_bin((uint32_t*)b, (uint32_t*)*t_bit_input, k, n, bit_align, new_ldb, 8);
|
||||||
|
|
||||||
return t_intput_size;
|
return t_intput_size;
|
||||||
}
|
}
|
||||||
@ -874,7 +876,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
|
|||||||
repack_input(state.input, state.workspace, l.w, l.h, l.c);
|
repack_input(state.input, state.workspace, l.w, l.h, l.c);
|
||||||
|
|
||||||
// 32 x floats -> 1 x uint32_t
|
// 32 x floats -> 1 x uint32_t
|
||||||
float_to_bit(state.workspace, (char *)l.bin_re_packed_input, l.c * l.w * l.h);
|
float_to_bit(state.workspace, (unsigned char *)l.bin_re_packed_input, l.c * l.w * l.h);
|
||||||
|
|
||||||
//free(re_packed_input);
|
//free(re_packed_input);
|
||||||
|
|
||||||
@ -900,10 +902,10 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
|
|||||||
|
|
||||||
// // then exit from if()
|
// // then exit from if()
|
||||||
|
|
||||||
transpose_uint32((uint32_t *)state.workspace, l.t_bit_input, new_k, n, n, new_ldb);
|
transpose_uint32((uint32_t *)state.workspace, (uint32_t*)l.t_bit_input, new_k, n, n, new_ldb);
|
||||||
|
|
||||||
// the main GEMM function
|
// the main GEMM function
|
||||||
gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, l.t_bit_input, new_ldb, c, n, l.mean_arr);
|
gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr);
|
||||||
|
|
||||||
// // alternative GEMM
|
// // alternative GEMM
|
||||||
//gemm_nn_bin_transposed_32bit_packed(m, n, new_k, 1,
|
//gemm_nn_bin_transposed_32bit_packed(m, n, new_k, 1,
|
||||||
@ -945,7 +947,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
|
|||||||
size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align);
|
size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align);
|
||||||
|
|
||||||
// 5x times faster than gemm()-float32
|
// 5x times faster than gemm()-float32
|
||||||
gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, l.t_bit_input, new_ldb, c, n, l.mean_arr);
|
gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr);
|
||||||
|
|
||||||
//gemm_nn_custom_bin_mean_transposed(m, n, k, 1, bit_weights, k, t_bit_input, new_ldb, c, n, mean_arr);
|
//gemm_nn_custom_bin_mean_transposed(m, n, k, 1, bit_weights, k, t_bit_input, new_ldb, c, n, mean_arr);
|
||||||
|
|
||||||
@ -1074,7 +1076,7 @@ void rescale_weights(convolutional_layer l, float scale, float trans)
|
|||||||
|
|
||||||
image *get_weights(convolutional_layer l)
|
image *get_weights(convolutional_layer l)
|
||||||
{
|
{
|
||||||
image *weights = calloc(l.n, sizeof(image));
|
image* weights = (image*)calloc(l.n, sizeof(image));
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < l.n; ++i){
|
for(i = 0; i < l.n; ++i){
|
||||||
weights[i] = copy_image(get_convolutional_weight(l, i));
|
weights[i] = copy_image(get_convolutional_weight(l, i));
|
||||||
@ -1097,4 +1099,3 @@ image *visualize_convolutional_layer(convolutional_layer l, char *window, image
|
|||||||
free_image(dc);
|
free_image(dc);
|
||||||
return single_weights;
|
return single_weights;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,9 @@
|
|||||||
|
|
||||||
typedef layer convolutional_layer;
|
typedef layer convolutional_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
||||||
void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
||||||
@ -22,11 +25,11 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
|
|||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
void cudnn_convolutional_setup(layer *l, int cudnn_preference);
|
void cudnn_convolutional_setup(layer *l, int cudnn_preference);
|
||||||
void create_convolutional_cudnn_tensors(layer *l);
|
void create_convolutional_cudnn_tensors(layer *l);
|
||||||
size_t get_convolutional_workspace_size(layer l);
|
|
||||||
void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
|
void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
size_t get_convolutional_workspace_size(layer l);
|
||||||
convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index);
|
convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index);
|
||||||
void denormalize_convolutional_layer(convolutional_layer l);
|
void denormalize_convolutional_layer(convolutional_layer l);
|
||||||
void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
|
void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
|
||||||
@ -53,5 +56,8 @@ int convolutional_out_width(convolutional_layer layer);
|
|||||||
void rescale_weights(convolutional_layer l, float scale, float trans);
|
void rescale_weights(convolutional_layer l, float scale, float trans);
|
||||||
void rgbgr_weights(convolutional_layer l);
|
void rgbgr_weights(convolutional_layer l);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -32,7 +32,7 @@ char *get_cost_string(COST_TYPE a)
|
|||||||
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
|
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "cost %4d\n", inputs);
|
fprintf(stderr, "cost %4d\n", inputs);
|
||||||
cost_layer l = {0};
|
cost_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = COST;
|
l.type = COST;
|
||||||
|
|
||||||
l.scale = scale;
|
l.scale = scale;
|
||||||
@ -40,9 +40,9 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca
|
|||||||
l.inputs = inputs;
|
l.inputs = inputs;
|
||||||
l.outputs = inputs;
|
l.outputs = inputs;
|
||||||
l.cost_type = cost_type;
|
l.cost_type = cost_type;
|
||||||
l.delta = calloc(inputs*batch, sizeof(float));
|
l.delta = (float*)calloc(inputs * batch, sizeof(float));
|
||||||
l.output = calloc(inputs*batch, sizeof(float));
|
l.output = (float*)calloc(inputs * batch, sizeof(float));
|
||||||
l.cost = calloc(1, sizeof(float));
|
l.cost = (float*)calloc(1, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_cost_layer;
|
l.forward = forward_cost_layer;
|
||||||
l.backward = backward_cost_layer;
|
l.backward = backward_cost_layer;
|
||||||
@ -60,8 +60,8 @@ void resize_cost_layer(cost_layer *l, int inputs)
|
|||||||
{
|
{
|
||||||
l->inputs = inputs;
|
l->inputs = inputs;
|
||||||
l->outputs = inputs;
|
l->outputs = inputs;
|
||||||
l->delta = realloc(l->delta, inputs*l->batch*sizeof(float));
|
l->delta = (float*)realloc(l->delta, inputs * l->batch * sizeof(float));
|
||||||
l->output = realloc(l->output, inputs*l->batch*sizeof(float));
|
l->output = (float*)realloc(l->output, inputs * l->batch * sizeof(float));
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
cuda_free(l->delta_gpu);
|
cuda_free(l->delta_gpu);
|
||||||
cuda_free(l->output_gpu);
|
cuda_free(l->output_gpu);
|
||||||
|
@ -5,6 +5,9 @@
|
|||||||
|
|
||||||
typedef layer cost_layer;
|
typedef layer cost_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
COST_TYPE get_cost_type(char *s);
|
COST_TYPE get_cost_type(char *s);
|
||||||
char *get_cost_string(COST_TYPE a);
|
char *get_cost_string(COST_TYPE a);
|
||||||
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
|
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
|
||||||
@ -17,4 +20,7 @@ void forward_cost_layer_gpu(cost_layer l, network_state state);
|
|||||||
void backward_cost_layer_gpu(const cost_layer l, network_state state);
|
void backward_cost_layer_gpu(const cost_layer l, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -9,7 +9,7 @@ void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
int i,j,k;
|
int i,j,k;
|
||||||
for(i = 0; i < M; ++i){
|
for(i = 0; i < M; ++i){
|
||||||
for(k = 0; k < K; ++k){
|
for(k = 0; k < K; ++k){
|
||||||
register float A_PART = ALPHA*A[i*lda+k];
|
float A_PART = ALPHA * A[i * lda + k];
|
||||||
for(j = 0; j < N; ++j){
|
for(j = 0; j < N; ++j){
|
||||||
C[i*ldc+j] += A_PART*B[k*ldb+j];
|
C[i*ldc+j] += A_PART*B[k*ldb+j];
|
||||||
}
|
}
|
||||||
@ -26,7 +26,7 @@ void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
int i,j,k;
|
int i,j,k;
|
||||||
for(i = 0; i < M; ++i){
|
for(i = 0; i < M; ++i){
|
||||||
for(j = 0; j < N; ++j){
|
for(j = 0; j < N; ++j){
|
||||||
register float sum = 0;
|
float sum = 0;
|
||||||
for(k = 0; k < K; ++k){
|
for(k = 0; k < K; ++k){
|
||||||
sum += ALPHA*A[i*lda+k]*B[k+j*ldb];
|
sum += ALPHA*A[i*lda+k]*B[k+j*ldb];
|
||||||
}
|
}
|
||||||
@ -44,7 +44,7 @@ void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
int i,j,k;
|
int i,j,k;
|
||||||
for(i = 0; i < M; ++i){
|
for(i = 0; i < M; ++i){
|
||||||
for(k = 0; k < K; ++k){
|
for(k = 0; k < K; ++k){
|
||||||
register float A_PART = ALPHA*A[k*lda+i];
|
float A_PART = ALPHA * A[k * lda + i];
|
||||||
for(j = 0; j < N; ++j){
|
for(j = 0; j < N; ++j){
|
||||||
C[i*ldc+j] += A_PART*B[k*ldb+j];
|
C[i*ldc+j] += A_PART*B[k*ldb+j];
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,7 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
|
|||||||
{
|
{
|
||||||
fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
|
fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
|
||||||
batch = batch / steps;
|
batch = batch / steps;
|
||||||
layer l = {0};
|
layer l = { (LAYER_TYPE)0 };
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
l.type = CRNN;
|
l.type = CRNN;
|
||||||
l.steps = steps;
|
l.steps = steps;
|
||||||
@ -44,22 +44,19 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
|
|||||||
l.hidden = h * w * hidden_filters;
|
l.hidden = h * w * hidden_filters;
|
||||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||||
|
|
||||||
l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
|
l.state = (float*)calloc(l.hidden * batch * (steps + 1), sizeof(float));
|
||||||
|
|
||||||
l.input_layer = malloc(sizeof(layer));
|
l.input_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "");
|
|
||||||
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||||
l.input_layer->batch = batch;
|
l.input_layer->batch = batch;
|
||||||
if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
|
if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
|
||||||
|
|
||||||
l.self_layer = malloc(sizeof(layer));
|
l.self_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "");
|
|
||||||
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||||
l.self_layer->batch = batch;
|
l.self_layer->batch = batch;
|
||||||
if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
|
if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
|
||||||
|
|
||||||
l.output_layer = malloc(sizeof(layer));
|
l.output_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "");
|
|
||||||
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
|
||||||
l.output_layer->batch = batch;
|
l.output_layer->batch = batch;
|
||||||
if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
|
if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
#include "layer.h"
|
#include "layer.h"
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize);
|
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize);
|
||||||
|
|
||||||
void forward_crnn_layer(layer l, network_state state);
|
void forward_crnn_layer(layer l, network_state state);
|
||||||
@ -20,5 +23,8 @@ void push_crnn_layer(layer l);
|
|||||||
void pull_crnn_layer(layer l);
|
void pull_crnn_layer(layer l);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -16,7 +16,7 @@ void backward_crop_layer_gpu(const crop_layer l, network_state state){}
|
|||||||
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
|
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
|
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
|
||||||
crop_layer l = {0};
|
crop_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = CROP;
|
l.type = CROP;
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
l.h = h;
|
l.h = h;
|
||||||
@ -32,7 +32,7 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int
|
|||||||
l.out_c = c;
|
l.out_c = c;
|
||||||
l.inputs = l.w * l.h * l.c;
|
l.inputs = l.w * l.h * l.c;
|
||||||
l.outputs = l.out_w * l.out_h * l.out_c;
|
l.outputs = l.out_w * l.out_h * l.out_c;
|
||||||
l.output = calloc(l.outputs*batch, sizeof(float));
|
l.output = (float*)calloc(l.outputs * batch, sizeof(float));
|
||||||
l.forward = forward_crop_layer;
|
l.forward = forward_crop_layer;
|
||||||
l.backward = backward_crop_layer;
|
l.backward = backward_crop_layer;
|
||||||
|
|
||||||
@ -56,7 +56,7 @@ void resize_crop_layer(layer *l, int w, int h)
|
|||||||
l->inputs = l->w * l->h * l->c;
|
l->inputs = l->w * l->h * l->c;
|
||||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||||
|
|
||||||
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
|
l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
cuda_free(l->output_gpu);
|
cuda_free(l->output_gpu);
|
||||||
l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
|
l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
|
||||||
|
@ -7,6 +7,9 @@
|
|||||||
|
|
||||||
typedef layer crop_layer;
|
typedef layer crop_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
image get_crop_image(crop_layer l);
|
image get_crop_image(crop_layer l);
|
||||||
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
|
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
|
||||||
void forward_crop_layer(const crop_layer l, network_state state);
|
void forward_crop_layer(const crop_layer l, network_state state);
|
||||||
@ -16,5 +19,8 @@ void resize_crop_layer(layer *l, int w, int h);
|
|||||||
void forward_crop_layer_gpu(crop_layer l, network_state state);
|
void forward_crop_layer_gpu(crop_layer l, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -2,12 +2,10 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "crop_layer.h"
|
#include "crop_layer.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
}
|
|
||||||
|
|
||||||
__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
|
__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
|
||||||
{
|
{
|
||||||
|
14
src/cuda.c
14
src/cuda.c
@ -1,4 +1,10 @@
|
|||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
int gpu_index = 0;
|
int gpu_index = 0;
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif // __cplusplus
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
|
||||||
@ -71,7 +77,7 @@ dim3 cuda_gridsize(size_t n){
|
|||||||
x = ceil(sqrt(k));
|
x = ceil(sqrt(k));
|
||||||
y = (n-1)/(x*BLOCK) + 1;
|
y = (n-1)/(x*BLOCK) + 1;
|
||||||
}
|
}
|
||||||
dim3 d = {x, y, 1};
|
dim3 d = { (unsigned int)x, (unsigned int)y, 1 };
|
||||||
//printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
|
//printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
@ -188,7 +194,7 @@ cublasHandle_t blas_handle()
|
|||||||
if(!init[i]) {
|
if(!init[i]) {
|
||||||
cublasCreate(&handle[i]);
|
cublasCreate(&handle[i]);
|
||||||
cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream());
|
cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream());
|
||||||
CHECK_CUDA(status);
|
CHECK_CUDA((cudaError_t)status);
|
||||||
init[i] = 1;
|
init[i] = 1;
|
||||||
}
|
}
|
||||||
return handle[i];
|
return handle[i];
|
||||||
@ -226,7 +232,7 @@ void cuda_random(float *x_gpu, size_t n)
|
|||||||
|
|
||||||
float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
|
float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
|
||||||
{
|
{
|
||||||
float *tmp = calloc(n, sizeof(float));
|
float* tmp = (float*)calloc(n, sizeof(float));
|
||||||
cuda_pull_array(x_gpu, tmp, n);
|
cuda_pull_array(x_gpu, tmp, n);
|
||||||
//int i;
|
//int i;
|
||||||
//for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
|
//for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
|
||||||
@ -310,6 +316,6 @@ int get_gpu_compute_capability(int i)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#else // GPU
|
#else // GPU
|
||||||
#include "cuda.h"
|
#include "darknet.h"
|
||||||
void cuda_set_device(int n) {}
|
void cuda_set_device(int n) {}
|
||||||
#endif // GPU
|
#endif // GPU
|
||||||
|
31
src/cuda.h
31
src/cuda.h
@ -1,25 +1,27 @@
|
|||||||
#ifndef CUDA_H
|
#ifndef DARKCUDA_H
|
||||||
#define CUDA_H
|
#define DARKCUDA_H
|
||||||
#include "darknet.h"
|
#include "darknet.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER) && _MSC_VER < 1900
|
#ifdef __cplusplus
|
||||||
#define inline __inline
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern int gpu_index;
|
extern int gpu_index;
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif // __cplusplus
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
|
||||||
#define BLOCK 512
|
|
||||||
|
|
||||||
#include "cuda_runtime.h"
|
#include <cuda_runtime.h>
|
||||||
#include "curand.h"
|
#include <curand.h>
|
||||||
#include "cublas_v2.h"
|
#include <cublas_v2.h>
|
||||||
#include "cuda_runtime_api.h"
|
#include <cuda_runtime_api.h>
|
||||||
//#include "driver_types.h"
|
#include <driver_types.h>
|
||||||
|
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
#include "cudnn.h"
|
#include <cudnn.h>
|
||||||
#endif // CUDNN
|
#endif // CUDNN
|
||||||
|
|
||||||
#ifndef __DATE__
|
#ifndef __DATE__
|
||||||
@ -65,9 +67,6 @@ extern "C" {
|
|||||||
cudaStream_t get_cuda_memcpy_stream();
|
cudaStream_t get_cuda_memcpy_stream();
|
||||||
int get_number_of_blocks(int array_size, int block_size);
|
int get_number_of_blocks(int array_size, int block_size);
|
||||||
int get_gpu_compute_capability(int i);
|
int get_gpu_compute_capability(int i);
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif // __cplusplus
|
|
||||||
|
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
cudnnHandle_t cudnn_handle();
|
cudnnHandle_t cudnn_handle();
|
||||||
@ -77,6 +76,10 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line
|
|||||||
#define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ );
|
#define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif // __cplusplus
|
||||||
|
|
||||||
#else // GPU
|
#else // GPU
|
||||||
//LIB_API void cuda_set_device(int n);
|
//LIB_API void cuda_set_device(int n);
|
||||||
#endif // GPU
|
#endif // GPU
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
#include "connected_layer.h"
|
#include "connected_layer.h"
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
|
extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
|
||||||
@ -258,12 +258,12 @@ layer normalize_layer(layer l, int n)
|
|||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
l.batch_normalize=1;
|
l.batch_normalize=1;
|
||||||
l.scales = calloc(n, sizeof(float));
|
l.scales = (float*)calloc(n, sizeof(float));
|
||||||
for(j = 0; j < n; ++j){
|
for(j = 0; j < n; ++j){
|
||||||
l.scales[j] = 1;
|
l.scales[j] = 1;
|
||||||
}
|
}
|
||||||
l.rolling_mean = calloc(n, sizeof(float));
|
l.rolling_mean = (float*)calloc(n, sizeof(float));
|
||||||
l.rolling_variance = calloc(n, sizeof(float));
|
l.rolling_variance = (float*)calloc(n, sizeof(float));
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#ifdef _WIN32
|
||||||
#ifndef _UNISTD_H
|
#ifndef _UNISTD_H
|
||||||
#define _UNISTD_H 1
|
#define _UNISTD_H 1
|
||||||
|
|
||||||
@ -6,12 +7,13 @@
|
|||||||
* Please add functionality as needed
|
* Please add functionality as needed
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <Winsock2.h>
|
||||||
|
#include <direct.h> /* for _getcwd() and _chdir() */
|
||||||
|
#include <getopt.h>
|
||||||
#include <io.h>
|
#include <io.h>
|
||||||
#include <process.h> /* for getpid() and the exec..() family */
|
#include <process.h> /* for getpid() and the exec..() family */
|
||||||
#include <direct.h> /* for _getcwd() and _chdir() */
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "getopt.h" /* getopt at: https://gist.github.com/ashelly/7776712 */
|
|
||||||
#define srandom srand
|
#define srandom srand
|
||||||
#define random rand
|
#define random rand
|
||||||
|
|
||||||
@ -19,7 +21,7 @@
|
|||||||
These may be OR'd together. */
|
These may be OR'd together. */
|
||||||
#define R_OK 4 /* Test for read permission. */
|
#define R_OK 4 /* Test for read permission. */
|
||||||
#define W_OK 2 /* Test for write permission. */
|
#define W_OK 2 /* Test for write permission. */
|
||||||
//#define X_OK 1 /* execute permission - unsupported in windows*/
|
#define X_OK R_OK /* execute permission - unsupported in Windows, \
|
||||||
#define F_OK 0 /* Test for existence. */
|
#define F_OK 0 /* Test for existence. */
|
||||||
|
|
||||||
#define access _access
|
#define access _access
|
||||||
@ -48,5 +50,7 @@ These may be OR'd together. */
|
|||||||
//typedef unsigned __int16 uint16_t;
|
//typedef unsigned __int16 uint16_t;
|
||||||
//typedef unsigned __int32 uint32_t;
|
//typedef unsigned __int32 uint32_t;
|
||||||
//typedef unsigned __int64 uint64_t;
|
//typedef unsigned __int64 uint64_t;
|
||||||
|
#endif /* _UNISTD_H */
|
||||||
#endif /* unistd.h */
|
#else
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif /* _WIN32 */
|
71
src/data.c
71
src/data.c
@ -41,7 +41,7 @@ char **get_random_paths_indexes(char **paths, int n, int m, int *indexes)
|
|||||||
|
|
||||||
char **get_random_paths(char **paths, int n, int m)
|
char **get_random_paths(char **paths, int n, int m)
|
||||||
{
|
{
|
||||||
char **random_paths = calloc(n, sizeof(char*));
|
char** random_paths = (char**)calloc(n, sizeof(char*));
|
||||||
int i;
|
int i;
|
||||||
pthread_mutex_lock(&mutex);
|
pthread_mutex_lock(&mutex);
|
||||||
//printf("n = %d \n", n);
|
//printf("n = %d \n", n);
|
||||||
@ -60,7 +60,7 @@ char **get_random_paths(char **paths, int n, int m)
|
|||||||
|
|
||||||
char **find_replace_paths(char **paths, int n, char *find, char *replace)
|
char **find_replace_paths(char **paths, int n, char *find, char *replace)
|
||||||
{
|
{
|
||||||
char **replace_paths = calloc(n, sizeof(char*));
|
char** replace_paths = (char**)calloc(n, sizeof(char*));
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
char replaced[4096];
|
char replaced[4096];
|
||||||
@ -75,7 +75,7 @@ matrix load_image_paths_gray(char **paths, int n, int w, int h)
|
|||||||
int i;
|
int i;
|
||||||
matrix X;
|
matrix X;
|
||||||
X.rows = n;
|
X.rows = n;
|
||||||
X.vals = calloc(X.rows, sizeof(float*));
|
X.vals = (float**)calloc(X.rows, sizeof(float*));
|
||||||
X.cols = 0;
|
X.cols = 0;
|
||||||
|
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
@ -96,7 +96,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
|
|||||||
int i;
|
int i;
|
||||||
matrix X;
|
matrix X;
|
||||||
X.rows = n;
|
X.rows = n;
|
||||||
X.vals = calloc(X.rows, sizeof(float*));
|
X.vals = (float**)calloc(X.rows, sizeof(float*));
|
||||||
X.cols = 0;
|
X.cols = 0;
|
||||||
|
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
@ -112,7 +112,7 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
|
|||||||
int i;
|
int i;
|
||||||
matrix X;
|
matrix X;
|
||||||
X.rows = n;
|
X.rows = n;
|
||||||
X.vals = calloc(X.rows, sizeof(float*));
|
X.vals = (float**)calloc(X.rows, sizeof(float*));
|
||||||
X.cols = 0;
|
X.cols = 0;
|
||||||
|
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
@ -139,7 +139,7 @@ extern int check_mistakes;
|
|||||||
|
|
||||||
box_label *read_boxes(char *filename, int *n)
|
box_label *read_boxes(char *filename, int *n)
|
||||||
{
|
{
|
||||||
box_label *boxes = calloc(1, sizeof(box_label));
|
box_label* boxes = (box_label*)calloc(1, sizeof(box_label));
|
||||||
FILE *file = fopen(filename, "r");
|
FILE *file = fopen(filename, "r");
|
||||||
if (!file) {
|
if (!file) {
|
||||||
printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename);
|
printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename);
|
||||||
@ -158,7 +158,7 @@ box_label *read_boxes(char *filename, int *n)
|
|||||||
int id;
|
int id;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
|
while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
|
||||||
boxes = realloc(boxes, (count+1)*sizeof(box_label));
|
boxes = (box_label*)realloc(boxes, (count + 1) * sizeof(box_label));
|
||||||
boxes[count].id = id;
|
boxes[count].id = id;
|
||||||
boxes[count].x = x;
|
boxes[count].x = x;
|
||||||
boxes[count].y = y;
|
boxes[count].y = y;
|
||||||
@ -300,7 +300,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
|
|||||||
free(boxes);
|
free(boxes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
|
void fill_truth_detection(const char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
|
||||||
int small_object, int net_w, int net_h)
|
int small_object, int net_w, int net_h)
|
||||||
{
|
{
|
||||||
char labelpath[4096];
|
char labelpath[4096];
|
||||||
@ -391,7 +391,6 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
|
|||||||
free(boxes);
|
free(boxes);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define NUMCHARS 37
|
|
||||||
|
|
||||||
void print_letters(float *pred, int n)
|
void print_letters(float *pred, int n)
|
||||||
{
|
{
|
||||||
@ -565,7 +564,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
|
|||||||
d.shallow = 0;
|
d.shallow = 0;
|
||||||
|
|
||||||
d.X.rows = n;
|
d.X.rows = n;
|
||||||
d.X.vals = calloc(d.X.rows, sizeof(float*));
|
d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
|
||||||
d.X.cols = h*w*3;
|
d.X.cols = h*w*3;
|
||||||
|
|
||||||
|
|
||||||
@ -619,7 +618,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
|
|||||||
d.shallow = 0;
|
d.shallow = 0;
|
||||||
|
|
||||||
d.X.rows = n;
|
d.X.rows = n;
|
||||||
d.X.vals = calloc(d.X.rows, sizeof(float*));
|
d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
|
||||||
d.X.cols = h*w*6;
|
d.X.cols = h*w*6;
|
||||||
|
|
||||||
int k = 2*(classes);
|
int k = 2*(classes);
|
||||||
@ -628,7 +627,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
|
|||||||
image im1 = load_image_color(paths[i*2], w, h);
|
image im1 = load_image_color(paths[i*2], w, h);
|
||||||
image im2 = load_image_color(paths[i*2+1], w, h);
|
image im2 = load_image_color(paths[i*2+1], w, h);
|
||||||
|
|
||||||
d.X.vals[i] = calloc(d.X.cols, sizeof(float));
|
d.X.vals[i] = (float*)calloc(d.X.cols, sizeof(float));
|
||||||
memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float));
|
memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float));
|
||||||
memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float));
|
memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float));
|
||||||
|
|
||||||
@ -690,7 +689,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
|
|||||||
d.h = h;
|
d.h = h;
|
||||||
|
|
||||||
d.X.rows = 1;
|
d.X.rows = 1;
|
||||||
d.X.vals = calloc(d.X.rows, sizeof(float*));
|
d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
|
||||||
d.X.cols = h*w*3;
|
d.X.cols = h*w*3;
|
||||||
|
|
||||||
int k = (4+classes)*30;
|
int k = (4+classes)*30;
|
||||||
@ -729,12 +728,12 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#include "opencv2/imgproc/imgproc_c.h"
|
#include <opencv2/imgproc/imgproc_c.h>
|
||||||
#include "opencv2/core/version.hpp"
|
#include <opencv2/core/version.hpp>
|
||||||
#ifndef CV_VERSION_EPOCH
|
#ifndef CV_VERSION_EPOCH
|
||||||
#include "opencv2/videoio/videoio_c.h"
|
#include <opencv2/videoio/videoio_c.h>
|
||||||
#include "opencv2/imgcodecs/imgcodecs_c.h"
|
#include <opencv2/imgcodecs/imgcodecs_c.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
@ -748,7 +747,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
|
|||||||
d.shallow = 0;
|
d.shallow = 0;
|
||||||
|
|
||||||
d.X.rows = n;
|
d.X.rows = n;
|
||||||
d.X.vals = calloc(d.X.rows, sizeof(float*));
|
d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
|
||||||
d.X.cols = h*w*c;
|
d.X.cols = h*w*c;
|
||||||
|
|
||||||
d.y = make_matrix(n, 5*boxes);
|
d.y = make_matrix(n, 5*boxes);
|
||||||
@ -817,7 +816,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
|
|||||||
d.shallow = 0;
|
d.shallow = 0;
|
||||||
|
|
||||||
d.X.rows = n;
|
d.X.rows = n;
|
||||||
d.X.vals = calloc(d.X.rows, sizeof(float*));
|
d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
|
||||||
d.X.cols = h*w*c;
|
d.X.cols = h*w*c;
|
||||||
|
|
||||||
d.y = make_matrix(n, 5 * boxes);
|
d.y = make_matrix(n, 5 * boxes);
|
||||||
@ -903,7 +902,7 @@ void *load_thread(void *ptr)
|
|||||||
pthread_t load_data_in_thread(load_args args)
|
pthread_t load_data_in_thread(load_args args)
|
||||||
{
|
{
|
||||||
pthread_t thread;
|
pthread_t thread;
|
||||||
struct load_args *ptr = calloc(1, sizeof(struct load_args));
|
struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
|
||||||
*ptr = args;
|
*ptr = args;
|
||||||
if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed");
|
if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed");
|
||||||
return thread;
|
return thread;
|
||||||
@ -918,8 +917,8 @@ void *load_threads(void *ptr)
|
|||||||
data *out = args.d;
|
data *out = args.d;
|
||||||
int total = args.n;
|
int total = args.n;
|
||||||
free(ptr);
|
free(ptr);
|
||||||
data *buffers = calloc(args.threads, sizeof(data));
|
data* buffers = (data*)calloc(args.threads, sizeof(data));
|
||||||
pthread_t *threads = calloc(args.threads, sizeof(pthread_t));
|
pthread_t* threads = (pthread_t*)calloc(args.threads, sizeof(pthread_t));
|
||||||
for(i = 0; i < args.threads; ++i){
|
for(i = 0; i < args.threads; ++i){
|
||||||
args.d = buffers + i;
|
args.d = buffers + i;
|
||||||
args.n = (i+1) * total/args.threads - i * total/args.threads;
|
args.n = (i+1) * total/args.threads - i * total/args.threads;
|
||||||
@ -942,7 +941,7 @@ void *load_threads(void *ptr)
|
|||||||
pthread_t load_data(load_args args)
|
pthread_t load_data(load_args args)
|
||||||
{
|
{
|
||||||
pthread_t thread;
|
pthread_t thread;
|
||||||
struct load_args *ptr = calloc(1, sizeof(struct load_args));
|
struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
|
||||||
*ptr = args;
|
*ptr = args;
|
||||||
if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
|
if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
|
||||||
return thread;
|
return thread;
|
||||||
@ -996,11 +995,11 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
|||||||
|
|
||||||
int i;
|
int i;
|
||||||
d.X.rows = n;
|
d.X.rows = n;
|
||||||
d.X.vals = calloc(n, sizeof(float*));
|
d.X.vals = (float**)calloc(n, sizeof(float*));
|
||||||
d.X.cols = w*h*3;
|
d.X.cols = w*h*3;
|
||||||
|
|
||||||
d.y.rows = n;
|
d.y.rows = n;
|
||||||
d.y.vals = calloc(n, sizeof(float*));
|
d.y.vals = (float**)calloc(n, sizeof(float*));
|
||||||
d.y.cols = w*scale * h*scale * 3;
|
d.y.cols = w*scale * h*scale * 3;
|
||||||
|
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
@ -1048,7 +1047,7 @@ matrix concat_matrix(matrix m1, matrix m2)
|
|||||||
matrix m;
|
matrix m;
|
||||||
m.cols = m1.cols;
|
m.cols = m1.cols;
|
||||||
m.rows = m1.rows+m2.rows;
|
m.rows = m1.rows+m2.rows;
|
||||||
m.vals = calloc(m1.rows + m2.rows, sizeof(float*));
|
m.vals = (float**)calloc(m1.rows + m2.rows, sizeof(float*));
|
||||||
for(i = 0; i < m1.rows; ++i){
|
for(i = 0; i < m1.rows; ++i){
|
||||||
m.vals[count++] = m1.vals[i];
|
m.vals[count++] = m1.vals[i];
|
||||||
}
|
}
|
||||||
@ -1072,9 +1071,9 @@ data concat_datas(data *d, int n)
|
|||||||
int i;
|
int i;
|
||||||
data out = {0};
|
data out = {0};
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
data new = concat_data(d[i], out);
|
data newdata = concat_data(d[i], out);
|
||||||
free_data(out);
|
free_data(out);
|
||||||
out = new;
|
out = newdata;
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@ -1300,8 +1299,8 @@ data get_random_data(data d, int num)
|
|||||||
r.X.cols = d.X.cols;
|
r.X.cols = d.X.cols;
|
||||||
r.y.cols = d.y.cols;
|
r.y.cols = d.y.cols;
|
||||||
|
|
||||||
r.X.vals = calloc(num, sizeof(float *));
|
r.X.vals = (float**)calloc(num, sizeof(float*));
|
||||||
r.y.vals = calloc(num, sizeof(float *));
|
r.y.vals = (float**)calloc(num, sizeof(float*));
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < num; ++i){
|
for(i = 0; i < num; ++i){
|
||||||
@ -1314,7 +1313,7 @@ data get_random_data(data d, int num)
|
|||||||
|
|
||||||
data *split_data(data d, int part, int total)
|
data *split_data(data d, int part, int total)
|
||||||
{
|
{
|
||||||
data *split = calloc(2, sizeof(data));
|
data* split = (data*)calloc(2, sizeof(data));
|
||||||
int i;
|
int i;
|
||||||
int start = part*d.X.rows/total;
|
int start = part*d.X.rows/total;
|
||||||
int end = (part+1)*d.X.rows/total;
|
int end = (part+1)*d.X.rows/total;
|
||||||
@ -1327,10 +1326,10 @@ data *split_data(data d, int part, int total)
|
|||||||
train.X.cols = test.X.cols = d.X.cols;
|
train.X.cols = test.X.cols = d.X.cols;
|
||||||
train.y.cols = test.y.cols = d.y.cols;
|
train.y.cols = test.y.cols = d.y.cols;
|
||||||
|
|
||||||
train.X.vals = calloc(train.X.rows, sizeof(float*));
|
train.X.vals = (float**)calloc(train.X.rows, sizeof(float*));
|
||||||
test.X.vals = calloc(test.X.rows, sizeof(float*));
|
test.X.vals = (float**)calloc(test.X.rows, sizeof(float*));
|
||||||
train.y.vals = calloc(train.y.rows, sizeof(float*));
|
train.y.vals = (float**)calloc(train.y.rows, sizeof(float*));
|
||||||
test.y.vals = calloc(test.y.rows, sizeof(float*));
|
test.y.vals = (float**)calloc(test.y.rows, sizeof(float*));
|
||||||
|
|
||||||
for(i = 0; i < start; ++i){
|
for(i = 0; i < start; ++i){
|
||||||
train.X.vals[i] = d.X.vals[i];
|
train.X.vals[i] = d.X.vals[i];
|
||||||
|
11
src/data.h
11
src/data.h
@ -2,14 +2,14 @@
|
|||||||
#define DATA_H
|
#define DATA_H
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
|
||||||
#if defined(_MSC_VER) && _MSC_VER < 1900
|
#include "darknet.h"
|
||||||
#define inline __inline
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "darknet.h"
|
#include "darknet.h"
|
||||||
#include "matrix.h"
|
#include "matrix.h"
|
||||||
#include "list.h"
|
#include "list.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
#include "tree.h"
|
#include "tree.h"
|
||||||
|
|
||||||
static inline float distance_from_edge(int x, int max)
|
static inline float distance_from_edge(int x, int max)
|
||||||
@ -115,5 +115,8 @@ data *split_data(data d, int part, int total);
|
|||||||
data concat_data(data d1, data d2);
|
data concat_data(data d1, data d2);
|
||||||
data concat_datas(data *d, int n);
|
data concat_datas(data *d, int n);
|
||||||
void fill_truth(char *path, char **labels, int k, float *truth);
|
void fill_truth(char *path, char **labels, int k, float *truth);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "convolutional_layer.h"
|
#include "convolutional_layer.h"
|
||||||
#include "deconvolutional_layer.h"
|
#include "deconvolutional_layer.h"
|
||||||
#include "gemm.h"
|
#include "gemm.h"
|
||||||
@ -11,7 +10,6 @@ extern "C" {
|
|||||||
#include "col2im.h"
|
#include "col2im.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
@ -95,7 +93,7 @@ extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
|
|||||||
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
|
extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
int size = layer.size*layer.size*layer.c*layer.n;
|
int size = layer.size*layer.size*layer.c*layer.n;
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ image get_deconvolutional_delta(deconvolutional_layer l)
|
|||||||
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
|
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
deconvolutional_layer l = {0};
|
deconvolutional_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = DECONVOLUTIONAL;
|
l.type = DECONVOLUTIONAL;
|
||||||
|
|
||||||
l.h = h;
|
l.h = h;
|
||||||
@ -57,11 +57,11 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
|
|||||||
l.stride = stride;
|
l.stride = stride;
|
||||||
l.size = size;
|
l.size = size;
|
||||||
|
|
||||||
l.weights = calloc(c*n*size*size, sizeof(float));
|
l.weights = (float*)calloc(c * n * size * size, sizeof(float));
|
||||||
l.weight_updates = calloc(c*n*size*size, sizeof(float));
|
l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
|
||||||
|
|
||||||
l.biases = calloc(n, sizeof(float));
|
l.biases = (float*)calloc(n, sizeof(float));
|
||||||
l.bias_updates = calloc(n, sizeof(float));
|
l.bias_updates = (float*)calloc(n, sizeof(float));
|
||||||
float scale = 1./sqrt(size*size*c);
|
float scale = 1./sqrt(size*size*c);
|
||||||
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
|
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
@ -76,9 +76,9 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
|
|||||||
l.outputs = l.out_w * l.out_h * l.out_c;
|
l.outputs = l.out_w * l.out_h * l.out_c;
|
||||||
l.inputs = l.w * l.h * l.c;
|
l.inputs = l.w * l.h * l.c;
|
||||||
|
|
||||||
l.col_image = calloc(h*w*size*size*n, sizeof(float));
|
l.col_image = (float*)calloc(h * w * size * size * n, sizeof(float));
|
||||||
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
|
||||||
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_deconvolutional_layer;
|
l.forward = forward_deconvolutional_layer;
|
||||||
l.backward = backward_deconvolutional_layer;
|
l.backward = backward_deconvolutional_layer;
|
||||||
@ -110,11 +110,11 @@ void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w)
|
|||||||
int out_h = deconvolutional_out_height(*l);
|
int out_h = deconvolutional_out_height(*l);
|
||||||
int out_w = deconvolutional_out_width(*l);
|
int out_w = deconvolutional_out_width(*l);
|
||||||
|
|
||||||
l->col_image = realloc(l->col_image,
|
l->col_image = (float*)realloc(l->col_image,
|
||||||
out_h*out_w*l->size*l->size*l->c*sizeof(float));
|
out_h*out_w*l->size*l->size*l->c*sizeof(float));
|
||||||
l->output = realloc(l->output,
|
l->output = (float*)realloc(l->output,
|
||||||
l->batch*out_h * out_w * l->n*sizeof(float));
|
l->batch*out_h * out_w * l->n*sizeof(float));
|
||||||
l->delta = realloc(l->delta,
|
l->delta = (float*)realloc(l->delta,
|
||||||
l->batch*out_h * out_w * l->n*sizeof(float));
|
l->batch*out_h * out_w * l->n*sizeof(float));
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
cuda_free(l->col_image_gpu);
|
cuda_free(l->col_image_gpu);
|
||||||
@ -191,7 +191,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_deconvolutional_layer(deconvolutional_layer l, float learning_rate, float momentum, float decay)
|
void update_deconvolutional_layer(deconvolutional_layer l, int skip, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
int size = l.size*l.size*l.c*l.n;
|
int size = l.size*l.size*l.c*l.n;
|
||||||
axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1);
|
axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1);
|
||||||
|
@ -9,10 +9,13 @@
|
|||||||
|
|
||||||
typedef layer deconvolutional_layer;
|
typedef layer deconvolutional_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
|
void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
|
||||||
void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
|
void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
|
||||||
void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
|
void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay);
|
||||||
void push_deconvolutional_layer(deconvolutional_layer layer);
|
void push_deconvolutional_layer(deconvolutional_layer layer);
|
||||||
void pull_deconvolutional_layer(deconvolutional_layer layer);
|
void pull_deconvolutional_layer(deconvolutional_layer layer);
|
||||||
#endif
|
#endif
|
||||||
@ -20,7 +23,7 @@ void pull_deconvolutional_layer(deconvolutional_layer layer);
|
|||||||
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
|
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
|
||||||
void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
|
void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
|
||||||
void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
|
void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
|
||||||
void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
|
void update_deconvolutional_layer(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay);
|
||||||
void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
|
void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
|
||||||
|
|
||||||
image get_deconvolutional_image(deconvolutional_layer layer);
|
image get_deconvolutional_image(deconvolutional_layer layer);
|
||||||
@ -30,5 +33,8 @@ image get_deconvolutional_filter(deconvolutional_layer layer, int i);
|
|||||||
int deconvolutional_out_height(deconvolutional_layer layer);
|
int deconvolutional_out_height(deconvolutional_layer layer);
|
||||||
int deconvolutional_out_width(deconvolutional_layer layer);
|
int deconvolutional_out_width(deconvolutional_layer layer);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
43
src/demo.c
43
src/demo.c
@ -9,20 +9,18 @@
|
|||||||
#include "demo.h"
|
#include "demo.h"
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <winsock.h>
|
|
||||||
#include "gettimeofday.h"
|
#include "gettimeofday.h"
|
||||||
#else
|
#else
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define FRAMES 3
|
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#include "opencv2/imgproc/imgproc_c.h"
|
#include <opencv2/imgproc/imgproc_c.h>
|
||||||
#include "opencv2/core/version.hpp"
|
#include <opencv2/core/version.hpp>
|
||||||
#ifndef CV_VERSION_EPOCH
|
#ifndef CV_VERSION_EPOCH
|
||||||
#include "opencv2/videoio/videoio_c.h"
|
#include <opencv2/videoio/videoio_c.h>
|
||||||
#endif
|
#endif
|
||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
image get_image_from_stream(CvCapture *cap);
|
image get_image_from_stream(CvCapture *cap);
|
||||||
@ -45,10 +43,10 @@ static int demo_ext_output = 0;
|
|||||||
static long long int frame_id = 0;
|
static long long int frame_id = 0;
|
||||||
static int demo_json_port = -1;
|
static int demo_json_port = -1;
|
||||||
|
|
||||||
static float *predictions[FRAMES];
|
static float* predictions[NFRAMES];
|
||||||
static int demo_index = 0;
|
static int demo_index = 0;
|
||||||
static image images[FRAMES];
|
static image images[NFRAMES];
|
||||||
static IplImage* ipl_images[FRAMES];
|
static IplImage* ipl_images[NFRAMES];
|
||||||
static float *avg;
|
static float *avg;
|
||||||
|
|
||||||
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output);
|
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output);
|
||||||
@ -77,7 +75,7 @@ void *fetch_in_thread(void *ptr)
|
|||||||
//error("Stream closed.");
|
//error("Stream closed.");
|
||||||
printf("Stream closed.\n");
|
printf("Stream closed.\n");
|
||||||
flag_exit = 1;
|
flag_exit = 1;
|
||||||
return EXIT_FAILURE;
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
//in_s = resize_image(in, net.w, net.h);
|
//in_s = resize_image(in, net.w, net.h);
|
||||||
|
|
||||||
@ -91,14 +89,14 @@ void *detect_in_thread(void *ptr)
|
|||||||
float *prediction = network_predict(net, X);
|
float *prediction = network_predict(net, X);
|
||||||
|
|
||||||
memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
|
memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
|
||||||
mean_arrays(predictions, FRAMES, l.outputs, avg);
|
mean_arrays(predictions, NFRAMES, l.outputs, avg);
|
||||||
l.output = avg;
|
l.output = avg;
|
||||||
|
|
||||||
free_image(det_s);
|
free_image(det_s);
|
||||||
|
|
||||||
ipl_images[demo_index] = det_img;
|
ipl_images[demo_index] = det_img;
|
||||||
det_img = ipl_images[(demo_index + FRAMES / 2 + 1) % FRAMES];
|
det_img = ipl_images[(demo_index + NFRAMES / 2 + 1) % NFRAMES];
|
||||||
demo_index = (demo_index + 1) % FRAMES;
|
demo_index = (demo_index + 1) % NFRAMES;
|
||||||
|
|
||||||
if (letter_box)
|
if (letter_box)
|
||||||
dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
|
dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
|
||||||
@ -110,11 +108,11 @@ void *detect_in_thread(void *ptr)
|
|||||||
|
|
||||||
double get_wall_time()
|
double get_wall_time()
|
||||||
{
|
{
|
||||||
struct timeval time;
|
struct timeval walltime;
|
||||||
if (gettimeofday(&time,NULL)){
|
if (gettimeofday(&walltime, NULL)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return (double)time.tv_sec + (double)time.tv_usec * .000001;
|
return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001;
|
||||||
}
|
}
|
||||||
|
|
||||||
void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
|
void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
|
||||||
@ -161,8 +159,8 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
|
|||||||
int j;
|
int j;
|
||||||
|
|
||||||
avg = (float *) calloc(l.outputs, sizeof(float));
|
avg = (float *) calloc(l.outputs, sizeof(float));
|
||||||
for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
|
for(j = 0; j < NFRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
|
||||||
for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3);
|
for(j = 0; j < NFRAMES; ++j) images[j] = make_image(1,1,3);
|
||||||
|
|
||||||
if (l.classes != demo_classes) {
|
if (l.classes != demo_classes) {
|
||||||
printf("Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes);
|
printf("Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes);
|
||||||
@ -185,7 +183,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
|
|||||||
det_img = in_img;
|
det_img = in_img;
|
||||||
det_s = in_s;
|
det_s = in_s;
|
||||||
|
|
||||||
for(j = 0; j < FRAMES/2; ++j){
|
for (j = 0; j < NFRAMES / 2; ++j) {
|
||||||
fetch_in_thread(0);
|
fetch_in_thread(0);
|
||||||
detect_in_thread(0);
|
detect_in_thread(0);
|
||||||
det_img = in_img;
|
det_img = in_img;
|
||||||
@ -318,10 +316,10 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
|
|||||||
free_image(in_s);
|
free_image(in_s);
|
||||||
|
|
||||||
free(avg);
|
free(avg);
|
||||||
for (j = 0; j < FRAMES; ++j) free(predictions[j]);
|
for (j = 0; j < NFRAMES; ++j) free(predictions[j]);
|
||||||
for (j = 0; j < FRAMES; ++j) free_image(images[j]);
|
for (j = 0; j < NFRAMES; ++j) free_image(images[j]);
|
||||||
|
|
||||||
free_ptrs(names, net.layers[net.n - 1].classes);
|
free_ptrs((void **)names, net.layers[net.n - 1].classes);
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
const int nsize = 8;
|
const int nsize = 8;
|
||||||
@ -342,4 +340,3 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
|
|||||||
fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
|
fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
10
src/demo.h
10
src/demo.h
@ -1,8 +1,14 @@
|
|||||||
#ifndef DEMO
|
#ifndef DEMO_H
|
||||||
#define DEMO
|
#define DEMO_H
|
||||||
|
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
|
void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
|
||||||
int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int json_port, int dont_show, int ext_output);
|
int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int json_port, int dont_show, int ext_output);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
|
|
||||||
detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore)
|
detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore)
|
||||||
{
|
{
|
||||||
detection_layer l = {0};
|
detection_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = DETECTION;
|
l.type = DETECTION;
|
||||||
|
|
||||||
l.n = n;
|
l.n = n;
|
||||||
@ -25,11 +25,11 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
|
|||||||
l.w = side;
|
l.w = side;
|
||||||
l.h = side;
|
l.h = side;
|
||||||
assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
|
assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
|
||||||
l.cost = calloc(1, sizeof(float));
|
l.cost = (float*)calloc(1, sizeof(float));
|
||||||
l.outputs = l.inputs;
|
l.outputs = l.inputs;
|
||||||
l.truths = l.side*l.side*(1+l.coords+l.classes);
|
l.truths = l.side*l.side*(1+l.coords+l.classes);
|
||||||
l.output = calloc(batch*l.outputs, sizeof(float));
|
l.output = (float*)calloc(batch * l.outputs, sizeof(float));
|
||||||
l.delta = calloc(batch*l.outputs, sizeof(float));
|
l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_detection_layer;
|
l.forward = forward_detection_layer;
|
||||||
l.backward = backward_detection_layer;
|
l.backward = backward_detection_layer;
|
||||||
@ -182,7 +182,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(0){
|
if(0){
|
||||||
float *costs = calloc(l.batch*locations*l.n, sizeof(float));
|
float* costs = (float*)calloc(l.batch * locations * l.n, sizeof(float));
|
||||||
for (b = 0; b < l.batch; ++b) {
|
for (b = 0; b < l.batch; ++b) {
|
||||||
int index = b*l.inputs;
|
int index = b*l.inputs;
|
||||||
for (i = 0; i < locations; ++i) {
|
for (i = 0; i < locations; ++i) {
|
||||||
@ -259,11 +259,11 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
|
float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
|
||||||
float *truth_cpu = 0;
|
float *truth_cpu = 0;
|
||||||
if(state.truth){
|
if(state.truth){
|
||||||
int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
|
int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
|
||||||
truth_cpu = calloc(num_truth, sizeof(float));
|
truth_cpu = (float*)calloc(num_truth, sizeof(float));
|
||||||
cuda_pull_array(state.truth, truth_cpu, num_truth);
|
cuda_pull_array(state.truth, truth_cpu, num_truth);
|
||||||
}
|
}
|
||||||
cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);
|
cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
|
|
||||||
typedef layer detection_layer;
|
typedef layer detection_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
|
detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
|
||||||
void forward_detection_layer(const detection_layer l, network_state state);
|
void forward_detection_layer(const detection_layer l, network_state state);
|
||||||
void backward_detection_layer(const detection_layer l, network_state state);
|
void backward_detection_layer(const detection_layer l, network_state state);
|
||||||
@ -17,4 +20,7 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state);
|
|||||||
void backward_detection_layer_gpu(detection_layer l, network_state state);
|
void backward_detection_layer_gpu(detection_layer l, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -9,27 +9,33 @@
|
|||||||
#include "option_list.h"
|
#include "option_list.h"
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#include "opencv2/core/core_c.h"
|
#include <opencv2/core/core_c.h>
|
||||||
//#include "opencv2/core/core.hpp"
|
//#include "opencv2/core/core.hpp"
|
||||||
#include "opencv2/core/version.hpp"
|
#include <opencv2/core/version.hpp>
|
||||||
#include "opencv2/imgproc/imgproc_c.h"
|
#include <opencv2/imgproc/imgproc_c.h>
|
||||||
|
|
||||||
#ifndef CV_VERSION_EPOCH
|
#ifndef CV_VERSION_EPOCH
|
||||||
#include "opencv2/videoio/videoio_c.h"
|
#include <opencv2/videoio/videoio_c.h>
|
||||||
#define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION)
|
#define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION)
|
||||||
|
#ifndef USE_CMAKE_LIBS
|
||||||
#pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib")
|
#pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib")
|
||||||
|
#endif // USE_CMAKE_LIBS
|
||||||
#else
|
#else
|
||||||
#define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)"" CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)
|
#define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)"" CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)
|
||||||
|
#ifndef USE_CMAKE_LIBS
|
||||||
#pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib")
|
#pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib")
|
||||||
#pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib")
|
#pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib")
|
||||||
#pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib")
|
#pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib")
|
||||||
|
#endif // USE_CMAKE_LIBS
|
||||||
#endif
|
#endif
|
||||||
IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
|
IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
|
||||||
|
|
||||||
void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
|
void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
|
||||||
float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port);
|
float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port);
|
||||||
|
#endif // OPENCV
|
||||||
|
|
||||||
|
#ifndef CV_RGB
|
||||||
#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
|
#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
|
||||||
#endif // OPENCV
|
#endif // OPENCV
|
||||||
|
|
||||||
@ -81,7 +87,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
network *nets = calloc(ngpus, sizeof(network));
|
network* nets = (network*)calloc(ngpus, sizeof(network));
|
||||||
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
int seed = rand();
|
int seed = rand();
|
||||||
@ -410,8 +416,8 @@ void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int
|
|||||||
if (ymax > h) ymax = h;
|
if (ymax > h) ymax = h;
|
||||||
|
|
||||||
for (j = 0; j < classes; ++j) {
|
for (j = 0; j < classes; ++j) {
|
||||||
int class = j;
|
int myclass = j;
|
||||||
if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[class],
|
if (dets[i].prob[myclass]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[myclass],
|
||||||
xmin, ymin, xmax, ymax);
|
xmin, ymin, xmax, ymax);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -465,7 +471,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (!outfile) outfile = "comp4_det_test_";
|
if (!outfile) outfile = "comp4_det_test_";
|
||||||
fps = calloc(classes, sizeof(FILE *));
|
fps = (FILE**)calloc(classes, sizeof(FILE*));
|
||||||
for (j = 0; j < classes; ++j) {
|
for (j = 0; j < classes; ++j) {
|
||||||
snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
|
snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
|
||||||
fps[j] = fopen(buff, "w");
|
fps[j] = fopen(buff, "w");
|
||||||
@ -482,11 +488,11 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||||||
|
|
||||||
int nthreads = 4;
|
int nthreads = 4;
|
||||||
if (m < 4) nthreads = m;
|
if (m < 4) nthreads = m;
|
||||||
image *val = calloc(nthreads, sizeof(image));
|
image* val = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *val_resized = calloc(nthreads, sizeof(image));
|
image* val_resized = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *buf = calloc(nthreads, sizeof(image));
|
image* buf = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *buf_resized = calloc(nthreads, sizeof(image));
|
image* buf_resized = (image*)calloc(nthreads, sizeof(image));
|
||||||
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
|
pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
|
||||||
|
|
||||||
load_args args = { 0 };
|
load_args args = { 0 };
|
||||||
args.w = net.w;
|
args.w = net.w;
|
||||||
@ -702,11 +708,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
|
|||||||
|
|
||||||
int nthreads = 4;
|
int nthreads = 4;
|
||||||
if (m < 4) nthreads = m;
|
if (m < 4) nthreads = m;
|
||||||
image *val = calloc(nthreads, sizeof(image));
|
image* val = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *val_resized = calloc(nthreads, sizeof(image));
|
image* val_resized = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *buf = calloc(nthreads, sizeof(image));
|
image* buf = (image*)calloc(nthreads, sizeof(image));
|
||||||
image *buf_resized = calloc(nthreads, sizeof(image));
|
image* buf_resized = (image*)calloc(nthreads, sizeof(image));
|
||||||
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
|
pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
|
||||||
|
|
||||||
load_args args = { 0 };
|
load_args args = { 0 };
|
||||||
args.w = net.w;
|
args.w = net.w;
|
||||||
@ -720,11 +726,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
|
|||||||
int tp_for_thresh = 0;
|
int tp_for_thresh = 0;
|
||||||
int fp_for_thresh = 0;
|
int fp_for_thresh = 0;
|
||||||
|
|
||||||
box_prob *detections = calloc(1, sizeof(box_prob));
|
box_prob* detections = (box_prob*)calloc(1, sizeof(box_prob));
|
||||||
int detections_count = 0;
|
int detections_count = 0;
|
||||||
int unique_truth_count = 0;
|
int unique_truth_count = 0;
|
||||||
|
|
||||||
int *truth_classes_count = calloc(classes, sizeof(int));
|
int* truth_classes_count = (int*)calloc(classes, sizeof(int));
|
||||||
|
|
||||||
for (t = 0; t < nthreads; ++t) {
|
for (t = 0; t < nthreads; ++t) {
|
||||||
args.path = paths[i + t];
|
args.path = paths[i + t];
|
||||||
@ -798,7 +804,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
|
|||||||
float prob = dets[i].prob[class_id];
|
float prob = dets[i].prob[class_id];
|
||||||
if (prob > 0) {
|
if (prob > 0) {
|
||||||
detections_count++;
|
detections_count++;
|
||||||
detections = realloc(detections, detections_count * sizeof(box_prob));
|
detections = (box_prob*)realloc(detections, detections_count * sizeof(box_prob));
|
||||||
detections[detections_count - 1].b = dets[i].bbox;
|
detections[detections_count - 1].b = dets[i].bbox;
|
||||||
detections[detections_count - 1].p = prob;
|
detections[detections_count - 1].p = prob;
|
||||||
detections[detections_count - 1].image_index = image_index;
|
detections[detections_count - 1].image_index = image_index;
|
||||||
@ -890,14 +896,14 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
|
|||||||
} pr_t;
|
} pr_t;
|
||||||
|
|
||||||
// for PR-curve
|
// for PR-curve
|
||||||
pr_t **pr = calloc(classes, sizeof(pr_t*));
|
pr_t** pr = (pr_t**)calloc(classes, sizeof(pr_t*));
|
||||||
for (i = 0; i < classes; ++i) {
|
for (i = 0; i < classes; ++i) {
|
||||||
pr[i] = calloc(detections_count, sizeof(pr_t));
|
pr[i] = (pr_t*)calloc(detections_count, sizeof(pr_t));
|
||||||
}
|
}
|
||||||
printf("\n detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count);
|
printf("\n detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count);
|
||||||
|
|
||||||
|
|
||||||
int *truth_flags = calloc(unique_truth_count, sizeof(int));
|
int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int));
|
||||||
|
|
||||||
int rank;
|
int rank;
|
||||||
for (rank = 0; rank < detections_count; ++rank) {
|
for (rank = 0; rank < detections_count; ++rank) {
|
||||||
@ -993,7 +999,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
|
|||||||
if (reinforcement_fd != NULL) fclose(reinforcement_fd);
|
if (reinforcement_fd != NULL) fclose(reinforcement_fd);
|
||||||
|
|
||||||
// free memory
|
// free memory
|
||||||
free_ptrs(names, net.layers[net.n - 1].classes);
|
free_ptrs((void**)names, net.layers[net.n - 1].classes);
|
||||||
free_list_contents_kvp(options);
|
free_list_contents_kvp(options);
|
||||||
free_list(options);
|
free_list(options);
|
||||||
|
|
||||||
@ -1043,7 +1049,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
//float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 };
|
//float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 };
|
||||||
float *rel_width_height_array = calloc(1000, sizeof(float));
|
float* rel_width_height_array = (float*)calloc(1000, sizeof(float));
|
||||||
|
|
||||||
|
|
||||||
list *options = read_data_cfg(datacfg);
|
list *options = read_data_cfg(datacfg);
|
||||||
@ -1079,7 +1085,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
|
|||||||
if (check_mistakes) getchar();
|
if (check_mistakes) getchar();
|
||||||
}
|
}
|
||||||
number_of_boxes++;
|
number_of_boxes++;
|
||||||
rel_width_height_array = realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
|
rel_width_height_array = (float*)realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
|
||||||
rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width;
|
rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width;
|
||||||
rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height;
|
rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height;
|
||||||
printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes);
|
printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes);
|
||||||
@ -1104,7 +1110,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
|
|||||||
// K-means
|
// K-means
|
||||||
anchors_data = do_kmeans(boxes_data, num_of_clusters);
|
anchors_data = do_kmeans(boxes_data, num_of_clusters);
|
||||||
|
|
||||||
qsort(anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), anchors_data_comparator);
|
qsort((void*)anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), (__compar_fn_t)anchors_data_comparator);
|
||||||
|
|
||||||
//gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66
|
//gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66
|
||||||
//float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 };
|
//float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 };
|
||||||
@ -1285,8 +1291,8 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
|||||||
layer l = net.layers[net.n - 1];
|
layer l = net.layers[net.n - 1];
|
||||||
|
|
||||||
//box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
|
//box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
|
||||||
//float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
|
//float **probs = calloc(l.w*l.h*l.n, sizeof(float*));
|
||||||
//for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
|
//for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
|
||||||
|
|
||||||
float *X = sized.data;
|
float *X = sized.data;
|
||||||
|
|
||||||
@ -1365,7 +1371,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
|||||||
}
|
}
|
||||||
|
|
||||||
// free memory
|
// free memory
|
||||||
free_ptrs(names, net.layers[net.n - 1].classes);
|
free_ptrs((void**)names, net.layers[net.n - 1].classes);
|
||||||
free_list_contents_kvp(options);
|
free_list_contents_kvp(options);
|
||||||
free_list(options);
|
free_list(options);
|
||||||
|
|
||||||
@ -1421,7 +1427,7 @@ void run_detector(int argc, char **argv)
|
|||||||
for (i = 0; i < len; ++i) {
|
for (i = 0; i < len; ++i) {
|
||||||
if (gpu_list[i] == ',') ++ngpus;
|
if (gpu_list[i] == ',') ++ngpus;
|
||||||
}
|
}
|
||||||
gpus = calloc(ngpus, sizeof(int));
|
gpus = (int*)calloc(ngpus, sizeof(int));
|
||||||
for (i = 0; i < ngpus; ++i) {
|
for (i = 0; i < ngpus; ++i) {
|
||||||
gpus[i] = atoi(gpu_list);
|
gpus[i] = atoi(gpu_list);
|
||||||
gpu_list = strchr(gpu_list, ',') + 1;
|
gpu_list = strchr(gpu_list, ',') + 1;
|
||||||
|
@ -9,7 +9,7 @@ void train_dice(char *cfgfile, char *weightfile)
|
|||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char* backup_directory = "backup/";
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
network net = parse_network_cfg(cfgfile);
|
network net = parse_network_cfg(cfgfile);
|
||||||
if(weightfile){
|
if(weightfile){
|
||||||
|
@ -6,13 +6,13 @@
|
|||||||
|
|
||||||
dropout_layer make_dropout_layer(int batch, int inputs, float probability)
|
dropout_layer make_dropout_layer(int batch, int inputs, float probability)
|
||||||
{
|
{
|
||||||
dropout_layer l = {0};
|
dropout_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = DROPOUT;
|
l.type = DROPOUT;
|
||||||
l.probability = probability;
|
l.probability = probability;
|
||||||
l.inputs = inputs;
|
l.inputs = inputs;
|
||||||
l.outputs = inputs;
|
l.outputs = inputs;
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
l.rand = calloc(inputs*batch, sizeof(float));
|
l.rand = (float*)calloc(inputs * batch, sizeof(float));
|
||||||
l.scale = 1./(1.-probability);
|
l.scale = 1./(1.-probability);
|
||||||
l.forward = forward_dropout_layer;
|
l.forward = forward_dropout_layer;
|
||||||
l.backward = backward_dropout_layer;
|
l.backward = backward_dropout_layer;
|
||||||
@ -27,7 +27,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
|
|||||||
|
|
||||||
void resize_dropout_layer(dropout_layer *l, int inputs)
|
void resize_dropout_layer(dropout_layer *l, int inputs)
|
||||||
{
|
{
|
||||||
l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float));
|
l->rand = (float*)realloc(l->rand, l->inputs * l->batch * sizeof(float));
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
cuda_free(l->rand_gpu);
|
cuda_free(l->rand_gpu);
|
||||||
|
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
|
|
||||||
typedef layer dropout_layer;
|
typedef layer dropout_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
dropout_layer make_dropout_layer(int batch, int inputs, float probability);
|
dropout_layer make_dropout_layer(int batch, int inputs, float probability);
|
||||||
|
|
||||||
void forward_dropout_layer(dropout_layer l, network_state state);
|
void forward_dropout_layer(dropout_layer l, network_state state);
|
||||||
@ -16,5 +19,8 @@ void resize_dropout_layer(dropout_layer *l, int inputs);
|
|||||||
void forward_dropout_layer_gpu(dropout_layer l, network_state state);
|
void forward_dropout_layer_gpu(dropout_layer l, network_state state);
|
||||||
void backward_dropout_layer_gpu(dropout_layer l, network_state state);
|
void backward_dropout_layer_gpu(dropout_layer l, network_state state);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,11 +2,9 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "dropout_layer.h"
|
#include "dropout_layer.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
|
__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
|
||||||
{
|
{
|
||||||
|
66
src/gemm.c
66
src/gemm.c
@ -7,7 +7,10 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#endif
|
#endif
|
||||||
@ -37,7 +40,7 @@ void gemm_bin(int M, int N, int K, float ALPHA,
|
|||||||
float *random_matrix(int rows, int cols)
|
float *random_matrix(int rows, int cols)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float *m = calloc(rows*cols, sizeof(float));
|
float* m = (float*)calloc(rows * cols, sizeof(float));
|
||||||
for(i = 0; i < rows*cols; ++i){
|
for(i = 0; i < rows*cols; ++i){
|
||||||
m[i] = (float)rand()/RAND_MAX;
|
m[i] = (float)rand()/RAND_MAX;
|
||||||
}
|
}
|
||||||
@ -83,7 +86,6 @@ void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
// XNOR bitwise GEMM for binary neural network
|
// XNOR bitwise GEMM for binary neural network
|
||||||
//--------------------------------------------
|
//--------------------------------------------
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
static inline unsigned char xnor(unsigned char a, unsigned char b) {
|
static inline unsigned char xnor(unsigned char a, unsigned char b) {
|
||||||
//return a == b;
|
//return a == b;
|
||||||
@ -318,6 +320,7 @@ void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef GPU
|
||||||
uint8_t reverse_8_bit(uint8_t a) {
|
uint8_t reverse_8_bit(uint8_t a) {
|
||||||
return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16;
|
return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16;
|
||||||
}
|
}
|
||||||
@ -465,6 +468,9 @@ void transpose_bin(char *A, char *B, const int n, const int m,
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#else
|
||||||
|
extern void transpose_32x32_bits_reversed_diagonale(uint32_t* A, uint32_t* B, int m, int n);
|
||||||
|
#endif
|
||||||
|
|
||||||
// transpose by 32-bit
|
// transpose by 32-bit
|
||||||
void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
|
void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
|
||||||
@ -483,7 +489,7 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
|
|||||||
//transpose_32x32_bits_my(&A[a_index/32], &B[b_index/32], lda/32, ldb/32);
|
//transpose_32x32_bits_my(&A[a_index/32], &B[b_index/32], lda/32, ldb/32);
|
||||||
}
|
}
|
||||||
for (; j < m; ++j) {
|
for (; j < m; ++j) {
|
||||||
if (get_bit(A, i*lda + j)) set_bit(B, j*ldb + i);
|
if (get_bit((const unsigned char* const)A, i * lda + j)) set_bit((unsigned char* const)B, j * ldb + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -703,7 +709,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
|
|||||||
else {
|
else {
|
||||||
for (i = 0; i < M; ++i) {
|
for (i = 0; i < M; ++i) {
|
||||||
for (k = 0; k < K; ++k) {
|
for (k = 0; k < K; ++k) {
|
||||||
register float A_PART = ALPHA*A[i*lda + k];
|
float A_PART = ALPHA * A[i * lda + k];
|
||||||
for (j = 0; j < N; ++j) {
|
for (j = 0; j < N; ++j) {
|
||||||
C[i*ldc + j] += A_PART*B[k*ldb + j];
|
C[i*ldc + j] += A_PART*B[k*ldb + j];
|
||||||
}
|
}
|
||||||
@ -730,9 +736,6 @@ void gemm_nn(int M, int N, int K, float ALPHA,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define TILE_M 4 // 4 ops
|
|
||||||
#define TILE_N 16 // AVX2 = 2 ops * 8 floats
|
|
||||||
#define TILE_K 16 // loop
|
|
||||||
|
|
||||||
void gemm_nn_fast(int M, int N, int K, float ALPHA,
|
void gemm_nn_fast(int M, int N, int K, float ALPHA,
|
||||||
float *A, int lda,
|
float *A, int lda,
|
||||||
@ -1286,16 +1289,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline float im2col_get_pixel(float *im, int height, int width, int channels,
|
|
||||||
int row, int col, int channel, int pad)
|
|
||||||
{
|
|
||||||
row -= pad;
|
|
||||||
col -= pad;
|
|
||||||
|
|
||||||
if (row < 0 || col < 0 ||
|
|
||||||
row >= height || col >= width) return 0;
|
|
||||||
return im[col + width*(row + height*channel)];
|
|
||||||
}
|
|
||||||
|
|
||||||
//From Berkeley Vision's Caffe!
|
//From Berkeley Vision's Caffe!
|
||||||
//https://github.com/BVLC/caffe/blob/master/LICENSE
|
//https://github.com/BVLC/caffe/blob/master/LICENSE
|
||||||
@ -1645,7 +1639,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
__m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS);
|
__m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS);
|
||||||
uint16_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1
|
uint16_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1
|
||||||
|
|
||||||
uint16_t *dst_ptr = &((unsigned char*)data_col)[col_index / 8];
|
uint16_t* dst_ptr = &((uint16_t*)data_col)[col_index / 8];
|
||||||
*dst_ptr |= (mask << (col_index % 8));
|
*dst_ptr |= (mask << (col_index % 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1657,7 +1651,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
|
//data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
|
||||||
float val = data_im[im_col + width*(im_row + height*c_im)];
|
float val = data_im[im_col + width*(im_row + height*c_im)];
|
||||||
if(val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char* const)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1671,7 +1665,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char* const)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1685,7 +1679,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char* const)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1699,7 +1693,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char* const)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1713,7 +1707,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char* const)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1952,7 +1946,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
|
|||||||
int i, j, k;
|
int i, j, k;
|
||||||
for (i = 0; i < M; ++i) {
|
for (i = 0; i < M; ++i) {
|
||||||
for (k = 0; k < K; ++k) {
|
for (k = 0; k < K; ++k) {
|
||||||
register float A_PART = ALPHA*A[i*lda + k];
|
float A_PART = ALPHA * A[i * lda + k];
|
||||||
for (j = 0; j < N; ++j) {
|
for (j = 0; j < N; ++j) {
|
||||||
C[i*ldc + j] += A_PART*B[k*ldb + j];
|
C[i*ldc + j] += A_PART*B[k*ldb + j];
|
||||||
}
|
}
|
||||||
@ -2239,7 +2233,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
int col_index = c * new_ldb + h * width_col + w;
|
int col_index = c * new_ldb + h * width_col + w;
|
||||||
|
|
||||||
float val = data_im[im_col + width*(im_row + height*c_im)];
|
float val = data_im[im_col + width*(im_row + height*c_im)];
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char*)data_col, col_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; w < width_col - pad; ++w) {
|
for (; w < width_col - pad; ++w) {
|
||||||
@ -2250,7 +2244,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
|
//data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)];
|
||||||
float val = data_im[im_col + width*(im_row + height*c_im)];
|
float val = data_im[im_col + width*(im_row + height*c_im)];
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char*)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2264,7 +2258,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char*)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2278,7 +2272,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char*)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2292,7 +2286,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char*)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2306,7 +2300,7 @@ void im2col_cpu_custom_bin(float* data_im,
|
|||||||
|
|
||||||
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
//data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad);
|
||||||
if (val > 0) set_bit(data_col, col_index);
|
if (val > 0) set_bit((unsigned char*)data_col, col_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2346,7 +2340,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size)
|
|||||||
memset(dst, 0, dst_size);
|
memset(dst, 0, dst_size);
|
||||||
|
|
||||||
size_t i;
|
size_t i;
|
||||||
char *byte_arr = calloc(size, sizeof(char));
|
char* byte_arr = (char*)calloc(size, sizeof(char));
|
||||||
for (i = 0; i < size; ++i) {
|
for (i = 0; i < size; ++i) {
|
||||||
if (src[i] > 0) byte_arr[i] = 1;
|
if (src[i] > 0) byte_arr[i] = 1;
|
||||||
}
|
}
|
||||||
@ -2578,7 +2572,7 @@ void gemm_nt(int M, int N, int K, float ALPHA,
|
|||||||
int i,j,k;
|
int i,j,k;
|
||||||
for(i = 0; i < M; ++i){
|
for(i = 0; i < M; ++i){
|
||||||
for(j = 0; j < N; ++j){
|
for(j = 0; j < N; ++j){
|
||||||
register float sum = 0;
|
float sum = 0;
|
||||||
for(k = 0; k < K; ++k){
|
for(k = 0; k < K; ++k){
|
||||||
sum += ALPHA*A[i*lda+k]*B[j*ldb + k];
|
sum += ALPHA*A[i*lda+k]*B[j*ldb + k];
|
||||||
}
|
}
|
||||||
@ -2595,7 +2589,7 @@ void gemm_tn(int M, int N, int K, float ALPHA,
|
|||||||
int i,j,k;
|
int i,j,k;
|
||||||
for(i = 0; i < M; ++i){
|
for(i = 0; i < M; ++i){
|
||||||
for(k = 0; k < K; ++k){
|
for(k = 0; k < K; ++k){
|
||||||
register float A_PART = ALPHA*A[k*lda+i];
|
float A_PART = ALPHA * A[k * lda + i];
|
||||||
for(j = 0; j < N; ++j){
|
for(j = 0; j < N; ++j){
|
||||||
C[i*ldc+j] += A_PART*B[k*ldb+j];
|
C[i*ldc+j] += A_PART*B[k*ldb+j];
|
||||||
}
|
}
|
||||||
@ -2611,7 +2605,7 @@ void gemm_tt(int M, int N, int K, float ALPHA,
|
|||||||
int i,j,k;
|
int i,j,k;
|
||||||
for(i = 0; i < M; ++i){
|
for(i = 0; i < M; ++i){
|
||||||
for(j = 0; j < N; ++j){
|
for(j = 0; j < N; ++j){
|
||||||
register float sum = 0;
|
float sum = 0;
|
||||||
for(k = 0; k < K; ++k){
|
for(k = 0; k < K; ++k){
|
||||||
sum += ALPHA*A[i+k*lda]*B[k+j*ldb];
|
sum += ALPHA*A[i+k*lda]*B[k+j*ldb];
|
||||||
}
|
}
|
||||||
@ -2668,9 +2662,9 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
float *C_gpu, int ldc)
|
float *C_gpu, int ldc)
|
||||||
{
|
{
|
||||||
cublasHandle_t handle = blas_handle();
|
cublasHandle_t handle = blas_handle();
|
||||||
cudaError_t stream_status = cublasSetStream(handle, get_cuda_stream());
|
cudaError_t stream_status = (cudaError_t)cublasSetStream(handle, get_cuda_stream());
|
||||||
CHECK_CUDA(stream_status);
|
CHECK_CUDA(stream_status);
|
||||||
cudaError_t status = cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N),
|
cudaError_t status = (cudaError_t)cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N),
|
||||||
(TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
|
(TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
|
||||||
CHECK_CUDA(status);
|
CHECK_CUDA(status);
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,9 @@
|
|||||||
#include "activations.h"
|
#include "activations.h"
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
|
void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
|
||||||
float *weights, float *input, float *output, float *mean);
|
float *weights, float *input, float *output, float *mean);
|
||||||
@ -56,6 +59,7 @@ void im2col_cpu_custom_transpose(float* data_im,
|
|||||||
|
|
||||||
void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a);
|
void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a);
|
||||||
|
|
||||||
|
LIB_API void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n);
|
||||||
|
|
||||||
void gemm_bin(int M, int N, int K, float ALPHA,
|
void gemm_bin(int M, int N, int K, float ALPHA,
|
||||||
char *A, int lda,
|
char *A, int lda,
|
||||||
@ -109,4 +113,7 @@ void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
float BETA,
|
float BETA,
|
||||||
float *C, int ldc);
|
float *C, int ldc);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
1690
src/getopt.c
1690
src/getopt.c
File diff suppressed because it is too large
Load Diff
317
src/getopt.h
317
src/getopt.h
@ -1,133 +1,228 @@
|
|||||||
/* Declarations for getopt.
|
#ifdef _WIN32
|
||||||
Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
|
#ifndef __GETOPT_H__
|
||||||
|
/**
|
||||||
|
* DISCLAIMER
|
||||||
|
* This file is part of the mingw-w64 runtime package.
|
||||||
|
*
|
||||||
|
* The mingw-w64 runtime package and its code is distributed in the hope that it
|
||||||
|
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
|
||||||
|
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
|
||||||
|
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
|
||||||
|
*
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose with or without fee is hereby granted, provided that the above
|
||||||
|
* copyright notice and this permission notice appear in all copies.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
*
|
||||||
|
* Sponsored in part by the Defense Advanced Research Projects
|
||||||
|
* Agency (DARPA) and Air Force Research Laboratory, Air Force
|
||||||
|
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
|
||||||
|
*/
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Dieter Baron and Thomas Klausner.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
This file is part of the GNU C Library. Its master source is NOT part of
|
#define __GETOPT_H__
|
||||||
the C library, however. The master source lives in /gd/gnu/lib.
|
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
/* All the headers include this file. */
|
||||||
modify it under the terms of the GNU Library General Public License as
|
#include <crtdefs.h>
|
||||||
published by the Free Software Foundation; either version 2 of the
|
#include <errno.h>
|
||||||
License, or (at your option) any later version.
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#include <Windows.h>
|
||||||
|
|
||||||
The GNU C Library is distributed in the hope that it will be useful,
|
#ifdef __cplusplus
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Library General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Library General Public
|
|
||||||
License along with the GNU C Library; see the file COPYING.LIB. If
|
|
||||||
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
|
|
||||||
Cambridge, MA 02139, USA. */
|
|
||||||
|
|
||||||
#ifndef _GETOPT_H
|
|
||||||
#define _GETOPT_H 1
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* For communication from `getopt' to the caller.
|
#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
|
||||||
When `getopt' finds an option that takes an argument,
|
|
||||||
the argument value is returned here.
|
|
||||||
Also, when `ordering' is RETURN_IN_ORDER,
|
|
||||||
each non-option ARGV-element is returned here. */
|
|
||||||
|
|
||||||
extern char *optarg;
|
//extern int optind; /* index of first non-option in argv */
|
||||||
|
//extern int optopt; /* single option character, as parsed */
|
||||||
|
//extern int opterr; /* flag to enable built-in diagnostics... */
|
||||||
|
// /* (user may set to zero, to suppress) */
|
||||||
|
//
|
||||||
|
//extern char *optarg; /* pointer to argument of current option */
|
||||||
|
|
||||||
/* Index in ARGV of the next element to be scanned.
|
#define PRINT_ERROR ((opterr) && (*options != ':'))
|
||||||
This is used for communication to and from the caller
|
|
||||||
and for communication between successive calls to `getopt'.
|
|
||||||
|
|
||||||
On entry to `getopt', zero means this is the first call; initialize.
|
#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
|
||||||
|
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
|
||||||
|
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
|
||||||
|
|
||||||
When `getopt' returns EOF, this is the index of the first of the
|
/* return values */
|
||||||
non-option elements that the caller should itself scan.
|
#define BADCH (int)'?'
|
||||||
|
#define BADARG ((*options == ':') ? (int)':' : (int)'?')
|
||||||
|
#define INORDER (int)1
|
||||||
|
|
||||||
Otherwise, `optind' communicates from one call to the next
|
#ifndef __CYGWIN__
|
||||||
how much of ARGV has been scanned so far. */
|
#define __progname __argv[0]
|
||||||
|
|
||||||
extern int optind;
|
|
||||||
|
|
||||||
/* Callers store zero here to inhibit the error message `getopt' prints
|
|
||||||
for unrecognized options. */
|
|
||||||
|
|
||||||
extern int opterr;
|
|
||||||
|
|
||||||
/* Set to an option character which was unrecognized. */
|
|
||||||
|
|
||||||
extern int optopt;
|
|
||||||
|
|
||||||
/* Describe the long-named options requested by the application.
|
|
||||||
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
|
|
||||||
of `struct option' terminated by an element containing a name which is
|
|
||||||
zero.
|
|
||||||
|
|
||||||
The field `has_arg' is:
|
|
||||||
no_argument (or 0) if the option does not take an argument,
|
|
||||||
required_argument (or 1) if the option requires an argument,
|
|
||||||
optional_argument (or 2) if the option takes an optional argument.
|
|
||||||
|
|
||||||
If the field `flag' is not NULL, it points to a variable that is set
|
|
||||||
to the value given in the field `val' when the option is found, but
|
|
||||||
left unchanged if the option is not found.
|
|
||||||
|
|
||||||
To have a long-named option do something other than set an `int' to
|
|
||||||
a compiled-in constant, such as set a value from `optarg', set the
|
|
||||||
option's `flag' field to zero and its `val' field to a nonzero
|
|
||||||
value (the equivalent single-letter option character, if there is
|
|
||||||
one). For long options that have a zero `flag' field, `getopt'
|
|
||||||
returns the contents of the `val' field. */
|
|
||||||
|
|
||||||
struct option
|
|
||||||
{
|
|
||||||
#if defined (__STDC__) && __STDC__
|
|
||||||
const char *name;
|
|
||||||
#else
|
#else
|
||||||
char *name;
|
extern char __declspec(dllimport) * __progname;
|
||||||
#endif
|
#endif
|
||||||
/* has_arg can't be an enum because some compilers complain about
|
|
||||||
type mismatches in all the code that assumes it is an int. */
|
|
||||||
int has_arg;
|
|
||||||
int *flag;
|
|
||||||
int val;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Names for the values of the `has_arg' field of `struct option'. */
|
#ifdef __CYGWIN__
|
||||||
|
static char EMSG[] = "";
|
||||||
|
#else
|
||||||
|
#define EMSG ""
|
||||||
|
#endif
|
||||||
|
|
||||||
#define no_argument 0
|
static int getopt_internal(int, char* const*, const char*,
|
||||||
#define required_argument 1
|
const struct option*, int*, int);
|
||||||
#define optional_argument 2
|
static int parse_long_options(char* const*, const char*,
|
||||||
|
const struct option*, int*, int);
|
||||||
|
static int gcd(int, int);
|
||||||
|
static void permute_args(int, int, int, char* const*);
|
||||||
|
|
||||||
#if defined (__STDC__) && __STDC__
|
static char* place = EMSG; /* option letter processing */
|
||||||
#ifdef __GNU_LIBRARY__
|
|
||||||
/* Many other libraries have conflicting prototypes for getopt, with
|
|
||||||
differences in the consts, in stdlib.h. To avoid compilation
|
|
||||||
errors, only prototype getopt for the GNU C library. */
|
|
||||||
extern int getopt (int argc, char *const *argv, const char *shortopts);
|
|
||||||
#else /* not __GNU_LIBRARY__ */
|
|
||||||
extern int getopt ();
|
|
||||||
#endif /* __GNU_LIBRARY__ */
|
|
||||||
extern int getopt_long (int argc, char *const *argv, const char *shortopts,
|
|
||||||
const struct option *longopts, int *longind);
|
|
||||||
extern int getopt_long_only (int argc, char *const *argv,
|
|
||||||
const char *shortopts,
|
|
||||||
const struct option *longopts, int *longind);
|
|
||||||
|
|
||||||
/* Internal only. Users should not call this directly. */
|
/* XXX: set optreset to 1 rather than these two */
|
||||||
extern int _getopt_internal (int argc, char *const *argv,
|
static int nonopt_start = -1; /* first non option argument (for permute) */
|
||||||
const char *shortopts,
|
static int nonopt_end = -1; /* first option after non options (for permute) */
|
||||||
const struct option *longopts, int *longind,
|
|
||||||
int long_only);
|
|
||||||
#else /* not __STDC__ */
|
|
||||||
extern int getopt ();
|
|
||||||
extern int getopt_long ();
|
|
||||||
extern int getopt_long_only ();
|
|
||||||
|
|
||||||
extern int _getopt_internal ();
|
/* Error messages */
|
||||||
#endif /* __STDC__ */
|
static const char recargchar[] = "option requires an argument -- %c";
|
||||||
|
static const char recargstring[] = "option requires an argument -- %s";
|
||||||
|
static const char ambig[] = "ambiguous option -- %.*s";
|
||||||
|
static const char noarg[] = "option doesn't take an argument -- %.*s";
|
||||||
|
static const char illoptchar[] = "unknown option -- %c";
|
||||||
|
static const char illoptstring[] = "unknown option -- %s";
|
||||||
|
|
||||||
#ifdef __cplusplus
|
static void _vwarnx(const char* fmt, va_list ap);
|
||||||
|
|
||||||
|
static void warnx(const char* fmt, ...);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the greatest common divisor of a and b.
|
||||||
|
*/
|
||||||
|
static int gcd(int a, int b);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exchange the block from nonopt_start to nonopt_end with the block
|
||||||
|
* from nonopt_end to opt_end (keeping the same order of arguments
|
||||||
|
* in each block).
|
||||||
|
*/
|
||||||
|
static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv);
|
||||||
|
|
||||||
|
#ifdef REPLACE_GETOPT
|
||||||
|
/*
|
||||||
|
* getopt --
|
||||||
|
* Parse argc/argv argument vector.
|
||||||
|
*
|
||||||
|
* [eventually this will replace the BSD getopt]
|
||||||
|
*/
|
||||||
|
int getopt(int nargc, char* const* nargv, const char* options);
|
||||||
|
#endif /* REPLACE_GETOPT */
|
||||||
|
|
||||||
|
//extern int getopt(int nargc, char * const *nargv, const char *options);
|
||||||
|
|
||||||
|
#ifdef _BSD_SOURCE
|
||||||
|
/*
|
||||||
|
* BSD adds the non-standard `optreset' feature, for reinitialisation
|
||||||
|
* of `getopt' parsing. We support this feature, for applications which
|
||||||
|
* proclaim their BSD heritage, before including this header; however,
|
||||||
|
* to maintain portability, developers are advised to avoid it.
|
||||||
|
*/
|
||||||
|
#define optreset __mingw_optreset
|
||||||
|
extern int optreset;
|
||||||
|
#endif
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* POSIX requires the `getopt' API to be specified in `unistd.h';
|
||||||
|
* thus, `unistd.h' includes this header. However, we do not want
|
||||||
|
* to expose the `getopt_long' or `getopt_long_only' APIs, when
|
||||||
|
* included in this manner. Thus, close the standard __GETOPT_H__
|
||||||
|
* declarations block, and open an additional __GETOPT_LONG_H__
|
||||||
|
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
|
||||||
|
* to declare the extended API.
|
||||||
|
*/
|
||||||
|
#endif /* !defined(__GETOPT_H__) */
|
||||||
|
|
||||||
|
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
|
||||||
|
#define __GETOPT_LONG_H__
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* parse_long_options --
|
||||||
|
* Parse long options in argc/argv argument vector.
|
||||||
|
* Returns -1 if short_too is set and the option does not match long_options.
|
||||||
|
*/
|
||||||
|
static int parse_long_options(char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* getopt_internal --
|
||||||
|
* Parse argc/argv argument vector. Called by user level routines.
|
||||||
|
*/
|
||||||
|
static int getopt_internal(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* getopt_long --
|
||||||
|
* Parse argc/argv argument vector.
|
||||||
|
*/
|
||||||
|
int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* getopt_long_only --
|
||||||
|
* Parse argc/argv argument vector.
|
||||||
|
*/
|
||||||
|
int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Previous MinGW implementation had...
|
||||||
|
*/
|
||||||
|
#ifndef HAVE_DECL_GETOPT
|
||||||
|
/*
|
||||||
|
* ...for the long form API only; keep this for compatibility.
|
||||||
|
*/
|
||||||
|
#define HAVE_DECL_GETOPT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* _GETOPT_H */
|
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
|
||||||
|
#endif
|
||||||
|
@ -1,49 +1,43 @@
|
|||||||
|
#ifdef _WIN32
|
||||||
#include "gettimeofday.h"
|
#include "gettimeofday.h"
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, struct timezone *tz)
|
LIB_API int gettimeofday(struct timeval* tp, struct timezone* tzp)
|
||||||
{
|
{
|
||||||
FILETIME ft;
|
static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL);
|
||||||
unsigned __int64 tmpres = 0;
|
SYSTEMTIME system_time;
|
||||||
static int tzflag;
|
FILETIME file_time;
|
||||||
|
uint64_t time;
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
|
GetSystemTime(&system_time);
|
||||||
|
SystemTimeToFileTime(&system_time, &file_time);
|
||||||
|
time = ((uint64_t)file_time.dwLowDateTime);
|
||||||
|
time += ((uint64_t)file_time.dwHighDateTime) << 32;
|
||||||
/*converting file time to unix epoch*/
|
/*converting file time to unix epoch*/
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
tp->tv_sec = (long)((time - EPOCH) / 10000000L);
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
tp->tv_usec = (long)(system_time.wMilliseconds * 1000);
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (NULL != tz)
|
|
||||||
{
|
|
||||||
if (!tzflag)
|
|
||||||
{
|
|
||||||
_tzset();
|
|
||||||
tzflag++;
|
|
||||||
}
|
|
||||||
tz->tz_minuteswest = _timezone / 60;
|
|
||||||
tz->tz_dsttime = _daylight;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIB_API int clock_gettime(int dummy, struct timespec* ct)
|
||||||
|
{
|
||||||
|
LARGE_INTEGER count;
|
||||||
|
|
||||||
|
if (g_first_time) {
|
||||||
|
g_first_time = 0;
|
||||||
|
|
||||||
|
if (0 == QueryPerformanceFrequency(&g_counts_per_sec)) {
|
||||||
|
g_counts_per_sec.QuadPart = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) || (0 == QueryPerformanceCounter(&count))) {
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* never worry about timersub type activies again -- from GLIBC and upcased. */
|
ct->tv_sec = count.QuadPart / g_counts_per_sec.QuadPart;
|
||||||
int timersub(struct timeval *a, struct timeval *b, struct timeval *result)
|
ct->tv_nsec = ((count.QuadPart % g_counts_per_sec.QuadPart) * BILLION) / g_counts_per_sec.QuadPart;
|
||||||
{
|
|
||||||
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec;
|
|
||||||
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec;
|
|
||||||
if ((result)->tv_usec < 0) {
|
|
||||||
--(result)->tv_sec;
|
|
||||||
(result)->tv_usec += 1000000;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
@ -1,20 +1,39 @@
|
|||||||
#pragma once
|
#ifdef _WIN32
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#include <Windows.h>
|
||||||
|
#include <Winsock2.h>
|
||||||
|
#include <stdint.h>
|
||||||
#include < time.h >
|
#include < time.h >
|
||||||
#include <windows.h> //I've ommited this line.
|
#include "darknet.h"
|
||||||
#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
|
#define CLOCK_REALTIME (1)
|
||||||
#else
|
#define BILLION (1E9)
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
|
#ifndef timersub
|
||||||
|
#define timersub(a, b, result) \
|
||||||
|
do { \
|
||||||
|
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
|
||||||
|
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
|
||||||
|
if ((result)->tv_usec < 0) { \
|
||||||
|
--(result)->tv_sec; \
|
||||||
|
(result)->tv_usec += 1000000; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#endif // timersub
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct timezone
|
static unsigned char g_first_time = 1;
|
||||||
{
|
static LARGE_INTEGER g_counts_per_sec;
|
||||||
int tz_minuteswest; /* minutes W of Greenwich */
|
|
||||||
int tz_dsttime; /* type of dst correction */
|
LIB_API int gettimeofday(struct timeval*, struct timezone*);
|
||||||
};
|
LIB_API int clock_gettime(int, struct timespec*);
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, struct timezone *tz);
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/* never worry about timersub type activies again -- from GLIBC and upcased. */
|
|
||||||
int timersub(struct timeval *a, struct timeval *b, struct timeval *result);
|
|
67
src/go.c
67
src/go.c
@ -5,13 +5,12 @@
|
|||||||
#include "blas.h"
|
#include "blas.h"
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int inverted = 1;
|
int inverted = 1;
|
||||||
int noi = 1;
|
int noi = 1;
|
||||||
//static const int nind = 5;
|
static const unsigned int n_ind = 5;
|
||||||
#define nind 5
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char **data;
|
char **data;
|
||||||
@ -22,7 +21,7 @@ char *fgetgo(FILE *fp)
|
|||||||
{
|
{
|
||||||
if(feof(fp)) return 0;
|
if(feof(fp)) return 0;
|
||||||
size_t size = 94;
|
size_t size = 94;
|
||||||
char *line = malloc(size*sizeof(char));
|
char* line = (char*)malloc(size * sizeof(char));
|
||||||
if(size != fread(line, sizeof(char), size, fp)){
|
if(size != fread(line, sizeof(char), size, fp)){
|
||||||
free(line);
|
free(line);
|
||||||
return 0;
|
return 0;
|
||||||
@ -35,21 +34,21 @@ moves load_go_moves(char *filename)
|
|||||||
{
|
{
|
||||||
moves m;
|
moves m;
|
||||||
m.n = 128;
|
m.n = 128;
|
||||||
m.data = calloc(128, sizeof(char*));
|
m.data = (char**)calloc(128, sizeof(char*));
|
||||||
FILE *fp = fopen(filename, "rb");
|
FILE *fp = fopen(filename, "rb");
|
||||||
int count = 0;
|
int count = 0;
|
||||||
char *line = 0;
|
char *line = 0;
|
||||||
while((line = fgetgo(fp))){
|
while((line = fgetgo(fp))){
|
||||||
if(count >= m.n){
|
if(count >= m.n){
|
||||||
m.n *= 2;
|
m.n *= 2;
|
||||||
m.data = realloc(m.data, m.n*sizeof(char*));
|
m.data = (char**)realloc(m.data, m.n * sizeof(char*));
|
||||||
}
|
}
|
||||||
m.data[count] = line;
|
m.data[count] = line;
|
||||||
++count;
|
++count;
|
||||||
}
|
}
|
||||||
printf("%d\n", count);
|
printf("%d\n", count);
|
||||||
m.n = count;
|
m.n = count;
|
||||||
m.data = realloc(m.data, count*sizeof(char*));
|
m.data = (char**)realloc(m.data, count * sizeof(char*));
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,12 +126,12 @@ void train_go(char *cfgfile, char *weightfile)
|
|||||||
}
|
}
|
||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
|
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char* backup_directory = "backup/";
|
||||||
|
|
||||||
char buff[256];
|
char buff[256];
|
||||||
float *board = calloc(19*19*net.batch, sizeof(float));
|
float* board = (float*)calloc(19 * 19 * net.batch, sizeof(float));
|
||||||
float *move = calloc(19*19*net.batch, sizeof(float));
|
float* move = (float*)calloc(19 * 19 * net.batch, sizeof(float));
|
||||||
moves m = load_go_moves("/home/pjreddie/backup/go.train");
|
moves m = load_go_moves("backup/go.train");
|
||||||
//moves m = load_go_moves("games.txt");
|
//moves m = load_go_moves("games.txt");
|
||||||
|
|
||||||
int N = m.n;
|
int N = m.n;
|
||||||
@ -187,7 +186,7 @@ void propagate_liberty(float *board, int *lib, int *visited, int row, int col, i
|
|||||||
|
|
||||||
int *calculate_liberties(float *board)
|
int *calculate_liberties(float *board)
|
||||||
{
|
{
|
||||||
int *lib = calloc(19*19, sizeof(int));
|
int* lib = (int*)calloc(19 * 19, sizeof(int));
|
||||||
int visited[361];
|
int visited[361];
|
||||||
int i, j;
|
int i, j;
|
||||||
for(j = 0; j < 19; ++j){
|
for(j = 0; j < 19; ++j){
|
||||||
@ -222,7 +221,7 @@ void print_board(float *board, int swap, int *indexes)
|
|||||||
int index = j*19 + i;
|
int index = j*19 + i;
|
||||||
if(indexes){
|
if(indexes){
|
||||||
int found = 0;
|
int found = 0;
|
||||||
for(n = 0; n < nind; ++n){
|
for (n = 0; n < n_ind; ++n) {
|
||||||
if(index == indexes[n]){
|
if(index == indexes[n]){
|
||||||
found = 1;
|
found = 1;
|
||||||
/*
|
/*
|
||||||
@ -365,9 +364,9 @@ int generate_move(network net, int player, float *board, int multi, float thresh
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int indexes[nind];
|
int indexes[n_ind];
|
||||||
top_k(move, 19*19, nind, indexes);
|
top_k(move, 19*19, n_ind, indexes);
|
||||||
if(thresh > move[indexes[0]]) thresh = move[indexes[nind-1]];
|
if(thresh > move[indexes[0]]) thresh = move[indexes[n_ind-1]];
|
||||||
|
|
||||||
for(i = 0; i < 19; ++i){
|
for(i = 0; i < 19; ++i){
|
||||||
for(j = 0; j < 19; ++j){
|
for(j = 0; j < 19; ++j){
|
||||||
@ -382,12 +381,12 @@ int generate_move(network net, int player, float *board, int multi, float thresh
|
|||||||
int index = sample_array(move, 19*19);
|
int index = sample_array(move, 19*19);
|
||||||
|
|
||||||
if(print){
|
if(print){
|
||||||
top_k(move, 19*19, nind, indexes);
|
top_k(move, 19*19, n_ind, indexes);
|
||||||
for(i = 0; i < nind; ++i){
|
for(i = 0; i < n_ind; ++i){
|
||||||
if (!move[indexes[i]]) indexes[i] = -1;
|
if (!move[indexes[i]]) indexes[i] = -1;
|
||||||
}
|
}
|
||||||
print_board(board, player, indexes);
|
print_board(board, player, indexes);
|
||||||
for(i = 0; i < nind; ++i){
|
for(i = 0; i < n_ind; ++i){
|
||||||
fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]);
|
fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -411,9 +410,9 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
|
|||||||
set_batch_network(&net, 1);
|
set_batch_network(&net, 1);
|
||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
|
|
||||||
float *board = calloc(19*19, sizeof(float));
|
float* board = (float*)calloc(19 * 19, sizeof(float));
|
||||||
float *move = calloc(19*19, sizeof(float));
|
float* move = (float*)calloc(19 * 19, sizeof(float));
|
||||||
moves m = load_go_moves("/home/pjreddie/backup/go.test");
|
moves m = load_go_moves("backup/go.test");
|
||||||
|
|
||||||
int N = m.n;
|
int N = m.n;
|
||||||
int i;
|
int i;
|
||||||
@ -439,9 +438,9 @@ void engine_go(char *filename, char *weightfile, int multi)
|
|||||||
}
|
}
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
set_batch_network(&net, 1);
|
set_batch_network(&net, 1);
|
||||||
float *board = calloc(19*19, sizeof(float));
|
float* board = (float*)calloc(19 * 19, sizeof(float));
|
||||||
char *one = calloc(91, sizeof(char));
|
char* one = (char*)calloc(91, sizeof(char));
|
||||||
char *two = calloc(91, sizeof(char));
|
char* two = (char*)calloc(91, sizeof(char));
|
||||||
int passed = 0;
|
int passed = 0;
|
||||||
while(1){
|
while(1){
|
||||||
char buff[256];
|
char buff[256];
|
||||||
@ -612,8 +611,8 @@ void test_go(char *cfg, char *weights, int multi)
|
|||||||
}
|
}
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
set_batch_network(&net, 1);
|
set_batch_network(&net, 1);
|
||||||
float *board = calloc(19*19, sizeof(float));
|
float* board = (float*)calloc(19 * 19, sizeof(float));
|
||||||
float *move = calloc(19*19, sizeof(float));
|
float* move = (float*)calloc(19 * 19, sizeof(float));
|
||||||
int color = 1;
|
int color = 1;
|
||||||
while(1){
|
while(1){
|
||||||
float *output = network_predict(net, board);
|
float *output = network_predict(net, board);
|
||||||
@ -642,11 +641,11 @@ void test_go(char *cfg, char *weights, int multi)
|
|||||||
if(board[i]) move[i] = 0;
|
if(board[i]) move[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int indexes[nind];
|
int indexes[n_ind];
|
||||||
int row, col;
|
int row, col;
|
||||||
top_k(move, 19*19, nind, indexes);
|
top_k(move, 19 * 19, n_ind, indexes);
|
||||||
print_board(board, color, indexes);
|
print_board(board, color, indexes);
|
||||||
for(i = 0; i < nind; ++i){
|
for (i = 0; i < n_ind; ++i) {
|
||||||
int index = indexes[i];
|
int index = indexes[i];
|
||||||
row = index / 19;
|
row = index / 19;
|
||||||
col = index % 19;
|
col = index % 19;
|
||||||
@ -664,7 +663,7 @@ void test_go(char *cfg, char *weights, int multi)
|
|||||||
int cnum = sscanf(line, "%c", &c);
|
int cnum = sscanf(line, "%c", &c);
|
||||||
if (strlen(line) == 0 || dnum) {
|
if (strlen(line) == 0 || dnum) {
|
||||||
--picked;
|
--picked;
|
||||||
if (picked < nind){
|
if (picked < n_ind){
|
||||||
int index = indexes[picked];
|
int index = indexes[picked];
|
||||||
row = index / 19;
|
row = index / 19;
|
||||||
col = index % 19;
|
col = index % 19;
|
||||||
@ -764,9 +763,9 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
|
|||||||
int count = 0;
|
int count = 0;
|
||||||
set_batch_network(&net, 1);
|
set_batch_network(&net, 1);
|
||||||
set_batch_network(&net2, 1);
|
set_batch_network(&net2, 1);
|
||||||
float *board = calloc(19*19, sizeof(float));
|
float* board = (float*)calloc(19 * 19, sizeof(float));
|
||||||
char *one = calloc(91, sizeof(char));
|
char* one = (char*)calloc(91, sizeof(char));
|
||||||
char *two = calloc(91, sizeof(char));
|
char* two = (char*)calloc(91, sizeof(char));
|
||||||
int done = 0;
|
int done = 0;
|
||||||
int player = 1;
|
int player = 1;
|
||||||
int p1 = 0;
|
int p1 = 0;
|
||||||
|
@ -30,42 +30,42 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
|
|||||||
{
|
{
|
||||||
fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs);
|
fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs);
|
||||||
batch = batch / steps;
|
batch = batch / steps;
|
||||||
layer l = {0};
|
layer l = { (LAYER_TYPE)0 };
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
l.type = GRU;
|
l.type = GRU;
|
||||||
l.steps = steps;
|
l.steps = steps;
|
||||||
l.inputs = inputs;
|
l.inputs = inputs;
|
||||||
|
|
||||||
l.input_z_layer = malloc(sizeof(layer));
|
l.input_z_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.input_z_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
*(l.input_z_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
||||||
l.input_z_layer->batch = batch;
|
l.input_z_layer->batch = batch;
|
||||||
|
|
||||||
l.state_z_layer = malloc(sizeof(layer));
|
l.state_z_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.state_z_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
*(l.state_z_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
||||||
l.state_z_layer->batch = batch;
|
l.state_z_layer->batch = batch;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
l.input_r_layer = malloc(sizeof(layer));
|
l.input_r_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.input_r_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
*(l.input_r_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
||||||
l.input_r_layer->batch = batch;
|
l.input_r_layer->batch = batch;
|
||||||
|
|
||||||
l.state_r_layer = malloc(sizeof(layer));
|
l.state_r_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.state_r_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
*(l.state_r_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
||||||
l.state_r_layer->batch = batch;
|
l.state_r_layer->batch = batch;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
l.input_h_layer = malloc(sizeof(layer));
|
l.input_h_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.input_h_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
*(l.input_h_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
||||||
l.input_h_layer->batch = batch;
|
l.input_h_layer->batch = batch;
|
||||||
|
|
||||||
l.state_h_layer = malloc(sizeof(layer));
|
l.state_h_layer = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.state_h_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
*(l.state_h_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
||||||
l.state_h_layer->batch = batch;
|
l.state_h_layer->batch = batch;
|
||||||
@ -74,16 +74,16 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
|
|||||||
|
|
||||||
|
|
||||||
l.outputs = outputs;
|
l.outputs = outputs;
|
||||||
l.output = calloc(outputs*batch*steps, sizeof(float));
|
l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
|
||||||
l.delta = calloc(outputs*batch*steps, sizeof(float));
|
l.delta = (float*)calloc(outputs * batch * steps, sizeof(float));
|
||||||
l.state = calloc(outputs*batch, sizeof(float));
|
l.state = (float*)calloc(outputs * batch, sizeof(float));
|
||||||
l.prev_state = calloc(outputs*batch, sizeof(float));
|
l.prev_state = (float*)calloc(outputs * batch, sizeof(float));
|
||||||
l.forgot_state = calloc(outputs*batch, sizeof(float));
|
l.forgot_state = (float*)calloc(outputs * batch, sizeof(float));
|
||||||
l.forgot_delta = calloc(outputs*batch, sizeof(float));
|
l.forgot_delta = (float*)calloc(outputs * batch, sizeof(float));
|
||||||
|
|
||||||
l.r_cpu = calloc(outputs*batch, sizeof(float));
|
l.r_cpu = (float*)calloc(outputs * batch, sizeof(float));
|
||||||
l.z_cpu = calloc(outputs*batch, sizeof(float));
|
l.z_cpu = (float*)calloc(outputs * batch, sizeof(float));
|
||||||
l.h_cpu = calloc(outputs*batch, sizeof(float));
|
l.h_cpu = (float*)calloc(outputs * batch, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_gru_layer;
|
l.forward = forward_gru_layer;
|
||||||
l.backward = backward_gru_layer;
|
l.backward = backward_gru_layer;
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
#include "layer.h"
|
#include "layer.h"
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
|
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
|
||||||
|
|
||||||
void forward_gru_layer(layer l, network_state state);
|
void forward_gru_layer(layer l, network_state state);
|
||||||
@ -20,5 +23,8 @@ void push_gru_layer(layer l);
|
|||||||
void pull_gru_layer(layer l);
|
void pull_gru_layer(layer l);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#include "image.h"
|
||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
@ -17,9 +18,10 @@ using std::endl;
|
|||||||
// socket related abstractions:
|
// socket related abstractions:
|
||||||
//
|
//
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
#ifndef USE_CMAKE_LIBS
|
||||||
#pragma comment(lib, "ws2_32.lib")
|
#pragma comment(lib, "ws2_32.lib")
|
||||||
#include <winsock.h>
|
#endif
|
||||||
#include <windows.h>
|
#include "gettimeofday.h"
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#define PORT unsigned long
|
#define PORT unsigned long
|
||||||
#define ADDRPOINTER int*
|
#define ADDRPOINTER int*
|
||||||
@ -44,7 +46,7 @@ static int close_socket(SOCKET s) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
#else // nix
|
#else // nix
|
||||||
#include <unistd.h>
|
#include "darkunistd.h"
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
@ -85,16 +87,15 @@ static int close_socket(SOCKET s) {
|
|||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
|
|
||||||
|
|
||||||
#include "opencv2/opencv.hpp"
|
#include <opencv2/opencv.hpp>
|
||||||
#include "opencv2/highgui/highgui.hpp"
|
#include <opencv2/highgui/highgui.hpp>
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#include "opencv2/imgproc/imgproc_c.h"
|
#include <opencv2/imgproc/imgproc_c.h>
|
||||||
#ifndef CV_VERSION_EPOCH
|
#ifndef CV_VERSION_EPOCH
|
||||||
#include "opencv2/videoio/videoio.hpp"
|
#include <opencv2/videoio/videoio.hpp>
|
||||||
#endif
|
#endif
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
#include "image.h"
|
|
||||||
|
|
||||||
|
|
||||||
class MJPG_sender
|
class MJPG_sender
|
||||||
@ -195,7 +196,8 @@ public:
|
|||||||
std::vector<int> params;
|
std::vector<int> params;
|
||||||
params.push_back(IMWRITE_JPEG_QUALITY);
|
params.push_back(IMWRITE_JPEG_QUALITY);
|
||||||
params.push_back(quality);
|
params.push_back(quality);
|
||||||
cv::imencode(".jpg", frame, outbuf, params);
|
//cv::imencode(".jpg", frame, outbuf, params); //REMOVED FOR COMPATIBILITY
|
||||||
|
std::cerr << "cv::imencode call disabled!" << std::endl;
|
||||||
size_t outlen = outbuf.size();
|
size_t outlen = outbuf.size();
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@ -227,17 +229,17 @@ public:
|
|||||||
}
|
}
|
||||||
maxfd = (maxfd>client ? maxfd : client);
|
maxfd = (maxfd>client ? maxfd : client);
|
||||||
FD_SET(client, &master);
|
FD_SET(client, &master);
|
||||||
_write(client, "HTTP/1.0 200 OK\r\n", 0);
|
_write(client, "HTTP/1.0 200 OK\n", 0);
|
||||||
_write(client,
|
_write(client,
|
||||||
"Server: Mozarella/2.2\r\n"
|
"Server: Mozarella/2.2\n"
|
||||||
"Accept-Range: bytes\r\n"
|
"Accept-Range: bytes\n"
|
||||||
"Connection: close\r\n"
|
"Connection: close\n"
|
||||||
"Max-Age: 0\r\n"
|
"Max-Age: 0\n"
|
||||||
"Expires: 0\r\n"
|
"Expires: 0\n"
|
||||||
"Cache-Control: no-cache, private\r\n"
|
"Cache-Control: no-cache, private\n"
|
||||||
"Pragma: no-cache\r\n"
|
"Pragma: no-cache\n"
|
||||||
"Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n"
|
"Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\n"
|
||||||
"\r\n", 0);
|
"\n", 0);
|
||||||
cerr << "MJPG_sender: new client " << client << endl;
|
cerr << "MJPG_sender: new client " << client << endl;
|
||||||
}
|
}
|
||||||
else // existing client, just stream pix
|
else // existing client, just stream pix
|
||||||
@ -249,7 +251,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
char head[400];
|
char head[400];
|
||||||
sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen);
|
sprintf(head, "--mjpegstream\nContent-Type: image/jpeg\nContent-Length: %zu\n\n", outlen);
|
||||||
_write(s, head, 0);
|
_write(s, head, 0);
|
||||||
int n = _write(s, (char*)(&outbuf[0]), outlen);
|
int n = _write(s, (char*)(&outbuf[0]), outlen);
|
||||||
//cerr << "known client " << s << " " << n << endl;
|
//cerr << "known client " << s << " " << n << endl;
|
||||||
@ -406,18 +408,18 @@ public:
|
|||||||
}
|
}
|
||||||
maxfd = (maxfd>client ? maxfd : client);
|
maxfd = (maxfd>client ? maxfd : client);
|
||||||
FD_SET(client, &master);
|
FD_SET(client, &master);
|
||||||
_write(client, "HTTP/1.0 200 OK\r\n", 0);
|
_write(client, "HTTP/1.0 200 OK\n", 0);
|
||||||
_write(client,
|
_write(client,
|
||||||
"Server: Mozarella/2.2\r\n"
|
"Server: Mozarella/2.2\n"
|
||||||
"Accept-Range: bytes\r\n"
|
"Accept-Range: bytes\n"
|
||||||
"Connection: close\r\n"
|
"Connection: close\n"
|
||||||
"Max-Age: 0\r\n"
|
"Max-Age: 0\n"
|
||||||
"Expires: 0\r\n"
|
"Expires: 0\n"
|
||||||
"Cache-Control: no-cache, private\r\n"
|
"Cache-Control: no-cache, private\n"
|
||||||
"Pragma: no-cache\r\n"
|
"Pragma: no-cache\n"
|
||||||
"Content-Type: application/json\r\n"
|
"Content-Type: application/json\n"
|
||||||
//"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n"
|
//"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n"
|
||||||
"\r\n", 0);
|
"\n", 0);
|
||||||
_write(client, "[\n", 0); // open JSON array
|
_write(client, "[\n", 0); // open JSON array
|
||||||
int n = _write(client, outputbuf, outlen);
|
int n = _write(client, outputbuf, outlen);
|
||||||
cerr << "JSON_sender: new client " << client << endl;
|
cerr << "JSON_sender: new client " << client << endl;
|
||||||
@ -473,7 +475,7 @@ void send_json(detection *dets, int nboxes, int classes, char **names, long long
|
|||||||
|
|
||||||
// ----------------------------------------
|
// ----------------------------------------
|
||||||
|
|
||||||
CvCapture* get_capture_video_stream(char *path) {
|
CvCapture* get_capture_video_stream(const char *path) {
|
||||||
CvCapture* cap = NULL;
|
CvCapture* cap = NULL;
|
||||||
try {
|
try {
|
||||||
cap = (CvCapture*)new cv::VideoCapture(path);
|
cap = (CvCapture*)new cv::VideoCapture(path);
|
||||||
@ -511,7 +513,7 @@ IplImage* get_webcam_frame(CvCapture *cap) {
|
|||||||
src = cvCloneImage(&tmp);
|
src = cvCloneImage(&tmp);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
std::cout << " Video-stream stoped! \n";
|
std::cout << " Video-stream stopped! \n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (...) {
|
catch (...) {
|
||||||
@ -536,9 +538,6 @@ int get_stream_fps_cpp(CvCapture *cap) {
|
|||||||
return fps;
|
return fps;
|
||||||
}
|
}
|
||||||
// ----------------------------------------
|
// ----------------------------------------
|
||||||
extern "C" {
|
|
||||||
image ipl_to_image(IplImage* src); // image.c
|
|
||||||
}
|
|
||||||
|
|
||||||
image image_data_augmentation(IplImage* ipl, int w, int h,
|
image image_data_augmentation(IplImage* ipl, int w, int h,
|
||||||
int pleft, int ptop, int swidth, int sheight, int flip,
|
int pleft, int ptop, int swidth, int sheight, int flip,
|
||||||
@ -701,4 +700,4 @@ void stop_timer_and_show() {
|
|||||||
}
|
}
|
||||||
void stop_timer_and_show_name(char *name) { stop_timer_and_show(); }
|
void stop_timer_and_show_name(char *name) { stop_timer_and_show(); }
|
||||||
void total_time() {}
|
void total_time() {}
|
||||||
#endif // C++11
|
#endif // C++11
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
#pragma once
|
|
||||||
#ifndef HTTP_STREAM_H
|
#ifndef HTTP_STREAM_H
|
||||||
#define HTTP_STREAM_H
|
#define HTTP_STREAM_H
|
||||||
#include "darknet.h"
|
#include "darknet.h"
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/core/version.hpp>
|
||||||
#include "opencv2/imgproc/imgproc_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
|
#include <opencv2/imgproc/imgproc_c.h>
|
||||||
|
#ifndef CV_VERSION_EPOCH
|
||||||
|
#include <opencv2/videoio/videoio_c.h>
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
@ -18,7 +21,7 @@ extern "C" {
|
|||||||
void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout);
|
void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout);
|
||||||
void send_mjpeg(IplImage* ipl, int port, int timeout, int quality);
|
void send_mjpeg(IplImage* ipl, int port, int timeout, int quality);
|
||||||
CvCapture* get_capture_webcam(int index);
|
CvCapture* get_capture_webcam(int index);
|
||||||
CvCapture* get_capture_video_stream(char *path);
|
CvCapture* get_capture_video_stream(const char *path);
|
||||||
IplImage* get_webcam_frame(CvCapture *cap);
|
IplImage* get_webcam_frame(CvCapture *cap);
|
||||||
int get_stream_fps_cpp(CvCapture *cap);
|
int get_stream_fps_cpp(CvCapture *cap);
|
||||||
|
|
||||||
|
@ -5,9 +5,14 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "darknet.h"
|
#include "darknet.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void im2col_cpu(float* data_im,
|
void im2col_cpu(float* data_im,
|
||||||
int channels, int height, int width,
|
int channels, int height, int width,
|
||||||
int ksize, int stride, int pad, float* data_col);
|
int ksize, int stride, int pad, float* data_col);
|
||||||
|
float im2col_get_pixel(float* im, int height, int width, int channels,
|
||||||
|
int row, int col, int channel, int pad);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
|
||||||
@ -63,5 +68,8 @@ void convolve_bin_cpu(float *input, float *weights, float *output, int in_w, int
|
|||||||
|
|
||||||
void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
|
void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,19 +1,15 @@
|
|||||||
#include "cuda_runtime.h"
|
#include <cuda_runtime.h>
|
||||||
#include "curand.h"
|
#include <curand.h>
|
||||||
#include "cublas_v2.h"
|
#include <cublas_v2.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "im2col.h"
|
#include "im2col.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <cuda.h>
|
//#include <cuda.h>
|
||||||
|
|
||||||
#define FULL_MASK 0xffffffff
|
|
||||||
#define WARP_SIZE 32
|
|
||||||
|
|
||||||
template<typename T1, typename T2>
|
template<typename T1, typename T2>
|
||||||
__device__ inline T1 __shfl_custom(T1 val, T2 lane) {
|
__device__ inline T1 __shfl_custom(T1 val, T2 lane) {
|
||||||
@ -154,11 +150,6 @@ __global__ void im2col_align_gpu_kernel(const int n, const float* data_im,
|
|||||||
{
|
{
|
||||||
//__shared__ float tmp_s[1];
|
//__shared__ float tmp_s[1];
|
||||||
|
|
||||||
//#define SHRED_VALS ((BLOCK / 169) * )
|
|
||||||
//__shared__ float dst_s[1024];
|
|
||||||
//__shared__ float dst_s[1024];
|
|
||||||
//__shared__ uint32_t bit_s[32];
|
|
||||||
//__shared__ uint8_t bit_s[128];
|
|
||||||
|
|
||||||
int index = blockIdx.x*blockDim.x + threadIdx.x;
|
int index = blockIdx.x*blockDim.x + threadIdx.x;
|
||||||
for (; index < n; index += blockDim.x*gridDim.x) {
|
for (; index < n; index += blockDim.x*gridDim.x) {
|
||||||
@ -604,8 +595,7 @@ __device__ void transpose32_optimized(uint32_t A[32]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define BLOCK_TRANSPOSE32 256
|
extern "C" {
|
||||||
|
|
||||||
__device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n)
|
__device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n)
|
||||||
{
|
{
|
||||||
//unsigned A_tmp[32];
|
//unsigned A_tmp[32];
|
||||||
@ -626,7 +616,7 @@ __device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B
|
|||||||
#pragma unroll 32
|
#pragma unroll 32
|
||||||
for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i];
|
for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// transpose 32x32 bit
|
// transpose 32x32 bit
|
||||||
__global__ void transpose_bin_gpu_kernel_32(uint32_t *A, uint32_t *B, const int n, const int m,
|
__global__ void transpose_bin_gpu_kernel_32(uint32_t *A, uint32_t *B, const int n, const int m,
|
||||||
|
45
src/image.c
45
src/image.c
@ -1,4 +1,3 @@
|
|||||||
#include "darknet.h"
|
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "blas.h"
|
#include "blas.h"
|
||||||
@ -6,25 +5,31 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
#ifndef STB_IMAGE_IMPLEMENTATION
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
#define STB_IMAGE_IMPLEMENTATION
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
|
#endif
|
||||||
|
#ifndef STB_IMAGE_WRITE_IMPLEMENTATION
|
||||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||||
#include "stb_image_write.h"
|
#include "stb_image_write.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#include "opencv2/imgproc/imgproc_c.h"
|
#include <opencv2/imgproc/imgproc_c.h>
|
||||||
#include "opencv2/core/types_c.h"
|
#include <opencv2/core/types_c.h>
|
||||||
#include "opencv2/core/version.hpp"
|
#include <opencv2/core/version.hpp>
|
||||||
#ifndef CV_VERSION_EPOCH
|
#ifndef CV_VERSION_EPOCH
|
||||||
#include "opencv2/videoio/videoio_c.h"
|
#include <opencv2/videoio/videoio_c.h>
|
||||||
#include "opencv2/imgcodecs/imgcodecs_c.h"
|
#include <opencv2/imgcodecs/imgcodecs_c.h>
|
||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
#endif
|
#endif
|
||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
|
|
||||||
|
#ifndef CV_RGB
|
||||||
#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
|
#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
extern int check_mistakes;
|
extern int check_mistakes;
|
||||||
int windows = 0;
|
int windows = 0;
|
||||||
@ -255,9 +260,9 @@ image **load_alphabet()
|
|||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
const int nsize = 8;
|
const int nsize = 8;
|
||||||
image **alphabets = calloc(nsize, sizeof(image));
|
image** alphabets = (image**)calloc(nsize, sizeof(image*));
|
||||||
for(j = 0; j < nsize; ++j){
|
for(j = 0; j < nsize; ++j){
|
||||||
alphabets[j] = calloc(128, sizeof(image));
|
alphabets[j] = (image*)calloc(128, sizeof(image));
|
||||||
for(i = 32; i < 127; ++i){
|
for(i = 32; i < 127; ++i){
|
||||||
char buff[256];
|
char buff[256];
|
||||||
sprintf(buff, "data/labels/%d_%d.png", i, j);
|
sprintf(buff, "data/labels/%d_%d.png", i, j);
|
||||||
@ -273,7 +278,7 @@ image **load_alphabet()
|
|||||||
detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names)
|
detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names)
|
||||||
{
|
{
|
||||||
int selected_num = 0;
|
int selected_num = 0;
|
||||||
detection_with_class* result_arr = calloc(dets_num, sizeof(detection_with_class));
|
detection_with_class* result_arr = (detection_with_class*)calloc(dets_num, sizeof(detection_with_class));
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < dets_num; ++i) {
|
for (i = 0; i < dets_num; ++i) {
|
||||||
int best_class = -1;
|
int best_class = -1;
|
||||||
@ -505,7 +510,7 @@ void save_cv_png(IplImage *img, const char *name)
|
|||||||
IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
|
IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
|
||||||
cvCvtColor(img, img_rgb, CV_RGB2BGR);
|
cvCvtColor(img, img_rgb, CV_RGB2BGR);
|
||||||
stbi_write_png(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 0);
|
stbi_write_png(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 0);
|
||||||
cvRelease(&img_rgb);
|
cvRelease((void**)&img_rgb);
|
||||||
}
|
}
|
||||||
|
|
||||||
void save_cv_jpg(IplImage *img, const char *name)
|
void save_cv_jpg(IplImage *img, const char *name)
|
||||||
@ -513,7 +518,7 @@ void save_cv_jpg(IplImage *img, const char *name)
|
|||||||
IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
|
IplImage* img_rgb = cvCreateImage(cvSize(img->width, img->height), 8, 3);
|
||||||
cvCvtColor(img, img_rgb, CV_RGB2BGR);
|
cvCvtColor(img, img_rgb, CV_RGB2BGR);
|
||||||
stbi_write_jpg(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 80);
|
stbi_write_jpg(name, img_rgb->width, img_rgb->height, 3, (char *)img_rgb->imageData, 80);
|
||||||
cvRelease(&img_rgb);
|
cvRelease((void**)&img_rgb);
|
||||||
}
|
}
|
||||||
|
|
||||||
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output)
|
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output)
|
||||||
@ -952,8 +957,8 @@ void normalize_image(image p)
|
|||||||
|
|
||||||
void normalize_image2(image p)
|
void normalize_image2(image p)
|
||||||
{
|
{
|
||||||
float *min = calloc(p.c, sizeof(float));
|
float* min = (float*)calloc(p.c, sizeof(float));
|
||||||
float *max = calloc(p.c, sizeof(float));
|
float* max = (float*)calloc(p.c, sizeof(float));
|
||||||
int i,j;
|
int i,j;
|
||||||
for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w];
|
for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w];
|
||||||
|
|
||||||
@ -982,7 +987,7 @@ void normalize_image2(image p)
|
|||||||
image copy_image(image p)
|
image copy_image(image p)
|
||||||
{
|
{
|
||||||
image copy = p;
|
image copy = p;
|
||||||
copy.data = calloc(p.h*p.w*p.c, sizeof(float));
|
copy.data = (float*)calloc(p.h * p.w * p.c, sizeof(float));
|
||||||
memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
|
memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
@ -1252,7 +1257,7 @@ void save_image_png(image im, const char *name)
|
|||||||
char buff[256];
|
char buff[256];
|
||||||
//sprintf(buff, "%s (%d)", name, windows);
|
//sprintf(buff, "%s (%d)", name, windows);
|
||||||
sprintf(buff, "%s.png", name);
|
sprintf(buff, "%s.png", name);
|
||||||
unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
|
unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
|
||||||
int i,k;
|
int i,k;
|
||||||
for(k = 0; k < im.c; ++k){
|
for(k = 0; k < im.c; ++k){
|
||||||
for(i = 0; i < im.w*im.h; ++i){
|
for(i = 0; i < im.w*im.h; ++i){
|
||||||
@ -1273,7 +1278,7 @@ void save_image_options(image im, const char *name, IMTYPE f, int quality)
|
|||||||
else if (f == TGA) sprintf(buff, "%s.tga", name);
|
else if (f == TGA) sprintf(buff, "%s.tga", name);
|
||||||
else if (f == JPG) sprintf(buff, "%s.jpg", name);
|
else if (f == JPG) sprintf(buff, "%s.jpg", name);
|
||||||
else sprintf(buff, "%s.png", name);
|
else sprintf(buff, "%s.png", name);
|
||||||
unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
|
unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
|
||||||
int i, k;
|
int i, k;
|
||||||
for (k = 0; k < im.c; ++k) {
|
for (k = 0; k < im.c; ++k) {
|
||||||
for (i = 0; i < im.w*im.h; ++i) {
|
for (i = 0; i < im.w*im.h; ++i) {
|
||||||
@ -1331,14 +1336,14 @@ image make_empty_image(int w, int h, int c)
|
|||||||
image make_image(int w, int h, int c)
|
image make_image(int w, int h, int c)
|
||||||
{
|
{
|
||||||
image out = make_empty_image(w,h,c);
|
image out = make_empty_image(w,h,c);
|
||||||
out.data = calloc(h*w*c, sizeof(float));
|
out.data = (float*)calloc(h * w * c, sizeof(float));
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
image make_random_image(int w, int h, int c)
|
image make_random_image(int w, int h, int c)
|
||||||
{
|
{
|
||||||
image out = make_empty_image(w,h,c);
|
image out = make_empty_image(w,h,c);
|
||||||
out.data = calloc(h*w*c, sizeof(float));
|
out.data = (float*)calloc(h * w * c, sizeof(float));
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < w*h*c; ++i){
|
for(i = 0; i < w*h*c; ++i){
|
||||||
out.data[i] = (rand_normal() * .25) + .5;
|
out.data[i] = (rand_normal() * .25) + .5;
|
||||||
@ -1585,7 +1590,7 @@ image random_augment_image(image im, float angle, float aspect, int low, int hig
|
|||||||
int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
|
int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
|
||||||
float scale = (float)r / min;
|
float scale = (float)r / min;
|
||||||
|
|
||||||
float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
|
float rad = rand_uniform(-angle, angle) * 2.0 * M_PI / 360.;
|
||||||
|
|
||||||
float dx = (im.w*scale/aspect - size) / 2.;
|
float dx = (im.w*scale/aspect - size) / 2.;
|
||||||
float dy = (im.h*scale - size) / 2.;
|
float dy = (im.h*scale - size) / 2.;
|
||||||
|
22
src/image.h
22
src/image.h
@ -7,8 +7,20 @@
|
|||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#ifdef OPENCV
|
||||||
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
|
#include <opencv2/imgproc/imgproc_c.h>
|
||||||
|
#include <opencv2/core/types_c.h>
|
||||||
|
#include <opencv2/core/version.hpp>
|
||||||
|
#ifndef CV_VERSION_EPOCH
|
||||||
|
#include <opencv2/videoio/videoio_c.h>
|
||||||
|
#include <opencv2/imgcodecs/imgcodecs_c.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#include "box.h"
|
#include "box.h"
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int w;
|
int w;
|
||||||
@ -80,6 +92,11 @@ image load_image(char *filename, int w, int h, int c);
|
|||||||
//LIB_API image load_image_color(char *filename, int w, int h);
|
//LIB_API image load_image_color(char *filename, int w, int h);
|
||||||
image **load_alphabet();
|
image **load_alphabet();
|
||||||
|
|
||||||
|
#ifdef OPENCV
|
||||||
|
LIB_API image get_image_from_stream(CvCapture* cap);
|
||||||
|
LIB_API image get_image_from_stream_cpp(CvCapture* cap);
|
||||||
|
LIB_API image ipl_to_image(IplImage* src);
|
||||||
|
#endif
|
||||||
//float get_pixel(image m, int x, int y, int c);
|
//float get_pixel(image m, int x, int y, int c);
|
||||||
//float get_pixel_extend(image m, int x, int y, int c);
|
//float get_pixel_extend(image m, int x, int y, int c);
|
||||||
//void set_pixel(image m, int x, int y, int c, float val);
|
//void set_pixel(image m, int x, int y, int c, float val);
|
||||||
@ -90,5 +107,8 @@ image get_image_layer(image m, int l);
|
|||||||
|
|
||||||
//LIB_API void free_image(image m);
|
//LIB_API void free_image(image m);
|
||||||
void test_resize(char *filename);
|
void test_resize(char *filename);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -98,7 +98,7 @@ void free_layer(layer l)
|
|||||||
if (l.x_gpu) cuda_free(l.x_gpu);
|
if (l.x_gpu) cuda_free(l.x_gpu);
|
||||||
if (l.x_norm_gpu) cuda_free(l.x_norm_gpu);
|
if (l.x_norm_gpu) cuda_free(l.x_norm_gpu);
|
||||||
|
|
||||||
if (l.align_bit_weights_gpu) cuda_free(l.align_bit_weights_gpu);
|
if (l.align_bit_weights_gpu) cuda_free((float *)l.align_bit_weights_gpu);
|
||||||
if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu);
|
if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu);
|
||||||
if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu);
|
if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu);
|
||||||
if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu);
|
if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu);
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
#ifndef BASE_LAYER_H
|
#ifndef BASE_LAYER_H
|
||||||
#define BASE_LAYER_H
|
#define BASE_LAYER_H
|
||||||
|
|
||||||
#include "darknet.h"
|
|
||||||
#include "activations.h"
|
#include "activations.h"
|
||||||
#include "stddef.h"
|
#include "stddef.h"
|
||||||
#include "tree.h"
|
#include "tree.h"
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
//struct network_state;
|
//struct network_state;
|
||||||
|
|
||||||
@ -330,4 +332,7 @@ struct layer{
|
|||||||
*/
|
*/
|
||||||
//void free_layer(layer);
|
//void free_layer(layer);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
22
src/list.c
22
src/list.c
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
list *make_list()
|
list *make_list()
|
||||||
{
|
{
|
||||||
list *l = malloc(sizeof(list));
|
list* l = (list*)malloc(sizeof(list));
|
||||||
l->size = 0;
|
l->size = 0;
|
||||||
l->front = 0;
|
l->front = 0;
|
||||||
l->back = 0;
|
l->back = 0;
|
||||||
@ -40,18 +40,18 @@ void *list_pop(list *l){
|
|||||||
|
|
||||||
void list_insert(list *l, void *val)
|
void list_insert(list *l, void *val)
|
||||||
{
|
{
|
||||||
node *new = malloc(sizeof(node));
|
node* newnode = (node*)malloc(sizeof(node));
|
||||||
new->val = val;
|
newnode->val = val;
|
||||||
new->next = 0;
|
newnode->next = 0;
|
||||||
|
|
||||||
if(!l->back){
|
if(!l->back){
|
||||||
l->front = new;
|
l->front = newnode;
|
||||||
new->prev = 0;
|
newnode->prev = 0;
|
||||||
}else{
|
}else{
|
||||||
l->back->next = new;
|
l->back->next = newnode;
|
||||||
new->prev = l->back;
|
newnode->prev = l->back;
|
||||||
}
|
}
|
||||||
l->back = new;
|
l->back = newnode;
|
||||||
++l->size;
|
++l->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,7 +84,7 @@ void free_list_contents_kvp(list *l)
|
|||||||
{
|
{
|
||||||
node *n = l->front;
|
node *n = l->front;
|
||||||
while (n) {
|
while (n) {
|
||||||
kvp *p = n->val;
|
kvp* p = (kvp*)n->val;
|
||||||
free(p->key);
|
free(p->key);
|
||||||
free(n->val);
|
free(n->val);
|
||||||
n = n->next;
|
n = n->next;
|
||||||
@ -93,7 +93,7 @@ void free_list_contents_kvp(list *l)
|
|||||||
|
|
||||||
void **list_to_array(list *l)
|
void **list_to_array(list *l)
|
||||||
{
|
{
|
||||||
void **a = calloc(l->size, sizeof(void*));
|
void** a = (void**)calloc(l->size, sizeof(void*));
|
||||||
int count = 0;
|
int count = 0;
|
||||||
node *n = l->front;
|
node *n = l->front;
|
||||||
while(n){
|
while(n){
|
||||||
|
@ -13,6 +13,9 @@ typedef struct list{
|
|||||||
node *back;
|
node *back;
|
||||||
} list;
|
} list;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
list *make_list();
|
list *make_list();
|
||||||
int list_find(list *l, void *val);
|
int list_find(list *l, void *val);
|
||||||
|
|
||||||
@ -24,4 +27,7 @@ void free_list(list *l);
|
|||||||
void free_list_contents(list *l);
|
void free_list_contents(list *l);
|
||||||
void free_list_contents_kvp(list *l);
|
void free_list_contents_kvp(list *l);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -26,7 +26,7 @@ int local_out_width(local_layer l)
|
|||||||
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
|
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
local_layer l = {0};
|
local_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = LOCAL;
|
l.type = LOCAL;
|
||||||
|
|
||||||
l.h = h;
|
l.h = h;
|
||||||
@ -47,19 +47,19 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
|
|||||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||||
l.inputs = l.w * l.h * l.c;
|
l.inputs = l.w * l.h * l.c;
|
||||||
|
|
||||||
l.weights = calloc(c*n*size*size*locations, sizeof(float));
|
l.weights = (float*)calloc(c * n * size * size * locations, sizeof(float));
|
||||||
l.weight_updates = calloc(c*n*size*size*locations, sizeof(float));
|
l.weight_updates = (float*)calloc(c * n * size * size * locations, sizeof(float));
|
||||||
|
|
||||||
l.biases = calloc(l.outputs, sizeof(float));
|
l.biases = (float*)calloc(l.outputs, sizeof(float));
|
||||||
l.bias_updates = calloc(l.outputs, sizeof(float));
|
l.bias_updates = (float*)calloc(l.outputs, sizeof(float));
|
||||||
|
|
||||||
// float scale = 1./sqrt(size*size*c);
|
// float scale = 1./sqrt(size*size*c);
|
||||||
float scale = sqrt(2./(size*size*c));
|
float scale = sqrt(2./(size*size*c));
|
||||||
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
|
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
|
||||||
|
|
||||||
l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
|
l.col_image = (float*)calloc(out_h * out_w * size * size * c, sizeof(float));
|
||||||
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
|
||||||
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_local_layer;
|
l.forward = forward_local_layer;
|
||||||
l.backward = backward_local_layer;
|
l.backward = backward_local_layer;
|
||||||
|
@ -9,6 +9,9 @@
|
|||||||
|
|
||||||
typedef layer local_layer;
|
typedef layer local_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_local_layer_gpu(local_layer layer, network_state state);
|
void forward_local_layer_gpu(local_layer layer, network_state state);
|
||||||
void backward_local_layer_gpu(local_layer layer, network_state state);
|
void backward_local_layer_gpu(local_layer layer, network_state state);
|
||||||
@ -27,5 +30,8 @@ void update_local_layer(local_layer layer, int batch, float learning_rate, float
|
|||||||
void bias_output(float *output, float *biases, int batch, int n, int size);
|
void bias_output(float *output, float *biases, int batch, int n, int size);
|
||||||
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
|
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -30,7 +30,7 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
|
|||||||
{
|
{
|
||||||
fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs);
|
fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs);
|
||||||
batch = batch / steps;
|
batch = batch / steps;
|
||||||
layer l = { 0 };
|
layer l = { (LAYER_TYPE)0 };
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
l.type = LSTM;
|
l.type = LSTM;
|
||||||
l.steps = steps;
|
l.steps = steps;
|
||||||
@ -39,49 +39,49 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
|
|||||||
l.out_h = 1;
|
l.out_h = 1;
|
||||||
l.out_c = outputs;
|
l.out_c = outputs;
|
||||||
|
|
||||||
l.uf = malloc(sizeof(layer));
|
l.uf = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
*(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
||||||
l.uf->batch = batch;
|
l.uf->batch = batch;
|
||||||
if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;
|
if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;
|
||||||
|
|
||||||
l.ui = malloc(sizeof(layer));
|
l.ui = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
*(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
||||||
l.ui->batch = batch;
|
l.ui->batch = batch;
|
||||||
if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;
|
if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;
|
||||||
|
|
||||||
l.ug = malloc(sizeof(layer));
|
l.ug = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
*(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
||||||
l.ug->batch = batch;
|
l.ug->batch = batch;
|
||||||
if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;
|
if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;
|
||||||
|
|
||||||
l.uo = malloc(sizeof(layer));
|
l.uo = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
*(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
|
||||||
l.uo->batch = batch;
|
l.uo->batch = batch;
|
||||||
if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;
|
if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;
|
||||||
|
|
||||||
l.wf = malloc(sizeof(layer));
|
l.wf = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
*(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
||||||
l.wf->batch = batch;
|
l.wf->batch = batch;
|
||||||
if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;
|
if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;
|
||||||
|
|
||||||
l.wi = malloc(sizeof(layer));
|
l.wi = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
*(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
||||||
l.wi->batch = batch;
|
l.wi->batch = batch;
|
||||||
if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;
|
if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;
|
||||||
|
|
||||||
l.wg = malloc(sizeof(layer));
|
l.wg = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
*(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
||||||
l.wg->batch = batch;
|
l.wg->batch = batch;
|
||||||
if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;
|
if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;
|
||||||
|
|
||||||
l.wo = malloc(sizeof(layer));
|
l.wo = (layer*)malloc(sizeof(layer));
|
||||||
fprintf(stderr, "\t\t");
|
fprintf(stderr, "\t\t");
|
||||||
*(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
*(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
|
||||||
l.wo->batch = batch;
|
l.wo->batch = batch;
|
||||||
@ -90,27 +90,27 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
|
|||||||
l.batch_normalize = batch_normalize;
|
l.batch_normalize = batch_normalize;
|
||||||
l.outputs = outputs;
|
l.outputs = outputs;
|
||||||
|
|
||||||
l.output = calloc(outputs*batch*steps, sizeof(float));
|
l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
|
||||||
l.state = calloc(outputs*batch, sizeof(float));
|
l.state = (float*)calloc(outputs * batch, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_lstm_layer;
|
l.forward = forward_lstm_layer;
|
||||||
l.update = update_lstm_layer;
|
l.update = update_lstm_layer;
|
||||||
|
|
||||||
l.prev_state_cpu = calloc(batch*outputs, sizeof(float));
|
l.prev_state_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.prev_cell_cpu = calloc(batch*outputs, sizeof(float));
|
l.prev_cell_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.cell_cpu = calloc(batch*outputs*steps, sizeof(float));
|
l.cell_cpu = (float*)calloc(batch*outputs*steps, sizeof(float));
|
||||||
|
|
||||||
l.f_cpu = calloc(batch*outputs, sizeof(float));
|
l.f_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.i_cpu = calloc(batch*outputs, sizeof(float));
|
l.i_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.g_cpu = calloc(batch*outputs, sizeof(float));
|
l.g_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.o_cpu = calloc(batch*outputs, sizeof(float));
|
l.o_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.c_cpu = calloc(batch*outputs, sizeof(float));
|
l.c_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.h_cpu = calloc(batch*outputs, sizeof(float));
|
l.h_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.temp_cpu = calloc(batch*outputs, sizeof(float));
|
l.temp_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.temp2_cpu = calloc(batch*outputs, sizeof(float));
|
l.temp2_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.temp3_cpu = calloc(batch*outputs, sizeof(float));
|
l.temp3_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.dc_cpu = calloc(batch*outputs, sizeof(float));
|
l.dc_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
l.dh_cpu = calloc(batch*outputs, sizeof(float));
|
l.dh_cpu = (float*)calloc(batch*outputs, sizeof(float));
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
l.forward_gpu = forward_lstm_layer_gpu;
|
l.forward_gpu = forward_lstm_layer_gpu;
|
||||||
|
@ -6,7 +6,10 @@
|
|||||||
#include "network.h"
|
#include "network.h"
|
||||||
#define USET
|
#define USET
|
||||||
|
|
||||||
layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
LIB_API layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
|
||||||
|
|
||||||
void forward_lstm_layer(layer l, network_state state);
|
void forward_lstm_layer(layer l, network_state state);
|
||||||
void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay);
|
void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||||
@ -15,6 +18,9 @@ void update_lstm_layer(layer l, int batch, float learning_rate, float momentum,
|
|||||||
void forward_lstm_layer_gpu(layer l, network_state state);
|
void forward_lstm_layer_gpu(layer l, network_state state);
|
||||||
void backward_lstm_layer_gpu(layer l, network_state state);
|
void backward_lstm_layer_gpu(layer l, network_state state);
|
||||||
void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
|
void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
28
src/matrix.c
28
src/matrix.c
@ -15,7 +15,7 @@ void free_matrix(matrix m)
|
|||||||
|
|
||||||
float matrix_topk_accuracy(matrix truth, matrix guess, int k)
|
float matrix_topk_accuracy(matrix truth, matrix guess, int k)
|
||||||
{
|
{
|
||||||
int *indexes = calloc(k, sizeof(int));
|
int* indexes = (int*)calloc(k, sizeof(int));
|
||||||
int n = truth.cols;
|
int n = truth.cols;
|
||||||
int i,j;
|
int i,j;
|
||||||
int correct = 0;
|
int correct = 0;
|
||||||
@ -48,15 +48,15 @@ matrix resize_matrix(matrix m, int size)
|
|||||||
int i;
|
int i;
|
||||||
if (m.rows == size) return m;
|
if (m.rows == size) return m;
|
||||||
if (m.rows < size) {
|
if (m.rows < size) {
|
||||||
m.vals = realloc(m.vals, size*sizeof(float*));
|
m.vals = (float**)realloc(m.vals, size * sizeof(float*));
|
||||||
for (i = m.rows; i < size; ++i) {
|
for (i = m.rows; i < size; ++i) {
|
||||||
m.vals[i] = calloc(m.cols, sizeof(float));
|
m.vals[i] = (float*)calloc(m.cols, sizeof(float));
|
||||||
}
|
}
|
||||||
} else if (m.rows > size) {
|
} else if (m.rows > size) {
|
||||||
for (i = size; i < m.rows; ++i) {
|
for (i = size; i < m.rows; ++i) {
|
||||||
free(m.vals[i]);
|
free(m.vals[i]);
|
||||||
}
|
}
|
||||||
m.vals = realloc(m.vals, size*sizeof(float*));
|
m.vals = (float**)realloc(m.vals, size * sizeof(float*));
|
||||||
}
|
}
|
||||||
m.rows = size;
|
m.rows = size;
|
||||||
return m;
|
return m;
|
||||||
@ -79,9 +79,9 @@ matrix make_matrix(int rows, int cols)
|
|||||||
matrix m;
|
matrix m;
|
||||||
m.rows = rows;
|
m.rows = rows;
|
||||||
m.cols = cols;
|
m.cols = cols;
|
||||||
m.vals = calloc(m.rows, sizeof(float *));
|
m.vals = (float**)calloc(m.rows, sizeof(float*));
|
||||||
for(i = 0; i < m.rows; ++i){
|
for(i = 0; i < m.rows; ++i){
|
||||||
m.vals[i] = calloc(m.cols, sizeof(float));
|
m.vals[i] = (float*)calloc(m.cols, sizeof(float));
|
||||||
}
|
}
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
@ -92,7 +92,7 @@ matrix hold_out_matrix(matrix *m, int n)
|
|||||||
matrix h;
|
matrix h;
|
||||||
h.rows = n;
|
h.rows = n;
|
||||||
h.cols = m->cols;
|
h.cols = m->cols;
|
||||||
h.vals = calloc(h.rows, sizeof(float *));
|
h.vals = (float**)calloc(h.rows, sizeof(float*));
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
int index = rand()%m->rows;
|
int index = rand()%m->rows;
|
||||||
h.vals[i] = m->vals[index];
|
h.vals[i] = m->vals[index];
|
||||||
@ -103,7 +103,7 @@ matrix hold_out_matrix(matrix *m, int n)
|
|||||||
|
|
||||||
float *pop_column(matrix *m, int c)
|
float *pop_column(matrix *m, int c)
|
||||||
{
|
{
|
||||||
float *col = calloc(m->rows, sizeof(float));
|
float* col = (float*)calloc(m->rows, sizeof(float));
|
||||||
int i, j;
|
int i, j;
|
||||||
for(i = 0; i < m->rows; ++i){
|
for(i = 0; i < m->rows; ++i){
|
||||||
col[i] = m->vals[i][c];
|
col[i] = m->vals[i][c];
|
||||||
@ -127,18 +127,18 @@ matrix csv_to_matrix(char *filename)
|
|||||||
|
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int size = 1024;
|
int size = 1024;
|
||||||
m.vals = calloc(size, sizeof(float*));
|
m.vals = (float**)calloc(size, sizeof(float*));
|
||||||
while((line = fgetl(fp))){
|
while((line = fgetl(fp))){
|
||||||
if(m.cols == -1) m.cols = count_fields(line);
|
if(m.cols == -1) m.cols = count_fields(line);
|
||||||
if(n == size){
|
if(n == size){
|
||||||
size *= 2;
|
size *= 2;
|
||||||
m.vals = realloc(m.vals, size*sizeof(float*));
|
m.vals = (float**)realloc(m.vals, size * sizeof(float*));
|
||||||
}
|
}
|
||||||
m.vals[n] = parse_fields(line, m.cols);
|
m.vals[n] = parse_fields(line, m.cols);
|
||||||
free(line);
|
free(line);
|
||||||
++n;
|
++n;
|
||||||
}
|
}
|
||||||
m.vals = realloc(m.vals, n*sizeof(float*));
|
m.vals = (float**)realloc(m.vals, n * sizeof(float*));
|
||||||
m.rows = n;
|
m.rows = n;
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
@ -225,7 +225,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers)
|
|||||||
matrix old_centers = make_matrix(centers.rows, centers.cols);
|
matrix old_centers = make_matrix(centers.rows, centers.cols);
|
||||||
|
|
||||||
int i, j;
|
int i, j;
|
||||||
int *counts = calloc(centers.rows, sizeof(int));
|
int *counts = (int*)calloc(centers.rows, sizeof(int));
|
||||||
for (i = 0; i < centers.rows; ++i) {
|
for (i = 0; i < centers.rows; ++i) {
|
||||||
for (j = 0; j < centers.cols; ++j) {
|
for (j = 0; j < centers.cols; ++j) {
|
||||||
old_centers.vals[i][j] = centers.vals[i][j];
|
old_centers.vals[i][j] = centers.vals[i][j];
|
||||||
@ -268,7 +268,7 @@ void random_centers(matrix data, matrix centers) {
|
|||||||
int *sample(int n)
|
int *sample(int n)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int *s = calloc(n, sizeof(int));
|
int* s = (int*)calloc(n, sizeof(int));
|
||||||
for (i = 0; i < n; ++i) s[i] = i;
|
for (i = 0; i < n; ++i) s[i] = i;
|
||||||
for (i = n - 1; i >= 0; --i) {
|
for (i = n - 1; i >= 0; --i) {
|
||||||
int swap = s[i];
|
int swap = s[i];
|
||||||
@ -301,7 +301,7 @@ void copy(float *x, float *y, int n)
|
|||||||
model do_kmeans(matrix data, int k)
|
model do_kmeans(matrix data, int k)
|
||||||
{
|
{
|
||||||
matrix centers = make_matrix(k, data.cols);
|
matrix centers = make_matrix(k, data.cols);
|
||||||
int *assignments = calloc(data.rows, sizeof(int));
|
int* assignments = (int*)calloc(data.rows, sizeof(int));
|
||||||
//smart_centers(data, centers);
|
//smart_centers(data, centers);
|
||||||
random_centers(data, centers); // IoU = 67.31% after kmeans
|
random_centers(data, centers); // IoU = 67.31% after kmeans
|
||||||
|
|
||||||
|
@ -12,6 +12,9 @@ typedef struct {
|
|||||||
matrix centers;
|
matrix centers;
|
||||||
} model;
|
} model;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
model do_kmeans(matrix data, int k);
|
model do_kmeans(matrix data, int k);
|
||||||
matrix make_matrix(int rows, int cols);
|
matrix make_matrix(int rows, int cols);
|
||||||
@ -28,4 +31,7 @@ matrix resize_matrix(matrix m, int size);
|
|||||||
|
|
||||||
float *pop_column(matrix *m, int c);
|
float *pop_column(matrix *m, int c);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -47,7 +47,7 @@ void cudnn_maxpool_setup(layer *l)
|
|||||||
|
|
||||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
|
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
|
||||||
{
|
{
|
||||||
maxpool_layer l = {0};
|
maxpool_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = MAXPOOL;
|
l.type = MAXPOOL;
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
l.h = h;
|
l.h = h;
|
||||||
@ -62,9 +62,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
|||||||
l.size = size;
|
l.size = size;
|
||||||
l.stride = stride;
|
l.stride = stride;
|
||||||
int output_size = l.out_h * l.out_w * l.out_c * batch;
|
int output_size = l.out_h * l.out_w * l.out_c * batch;
|
||||||
l.indexes = calloc(output_size, sizeof(int));
|
l.indexes = (int*)calloc(output_size, sizeof(int));
|
||||||
l.output = calloc(output_size, sizeof(float));
|
l.output = (float*)calloc(output_size, sizeof(float));
|
||||||
l.delta = calloc(output_size, sizeof(float));
|
l.delta = (float*)calloc(output_size, sizeof(float));
|
||||||
l.forward = forward_maxpool_layer;
|
l.forward = forward_maxpool_layer;
|
||||||
l.backward = backward_maxpool_layer;
|
l.backward = backward_maxpool_layer;
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
@ -93,9 +93,9 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
|
|||||||
l->outputs = l->out_w * l->out_h * l->c;
|
l->outputs = l->out_w * l->out_h * l->c;
|
||||||
int output_size = l->outputs * l->batch;
|
int output_size = l->outputs * l->batch;
|
||||||
|
|
||||||
l->indexes = realloc(l->indexes, output_size * sizeof(int));
|
l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int));
|
||||||
l->output = realloc(l->output, output_size * sizeof(float));
|
l->output = (float*)realloc(l->output, output_size * sizeof(float));
|
||||||
l->delta = realloc(l->delta, output_size * sizeof(float));
|
l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
CHECK_CUDA(cudaFree((float *)l->indexes_gpu));
|
CHECK_CUDA(cudaFree((float *)l->indexes_gpu));
|
||||||
|
@ -8,6 +8,9 @@
|
|||||||
|
|
||||||
typedef layer maxpool_layer;
|
typedef layer maxpool_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
image get_maxpool_image(maxpool_layer l);
|
image get_maxpool_image(maxpool_layer l);
|
||||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
|
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
|
||||||
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
|
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
|
||||||
@ -20,5 +23,8 @@ void backward_maxpool_layer_gpu(maxpool_layer l, network_state state);
|
|||||||
void cudnn_maxpool_setup(maxpool_layer *l);
|
void cudnn_maxpool_setup(maxpool_layer *l);
|
||||||
#endif // GPU
|
#endif // GPU
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -2,10 +2,8 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "maxpool_layer.h"
|
#include "maxpool_layer.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
|
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
|
||||||
{
|
{
|
||||||
|
@ -180,16 +180,16 @@ network make_network(int n)
|
|||||||
{
|
{
|
||||||
network net = {0};
|
network net = {0};
|
||||||
net.n = n;
|
net.n = n;
|
||||||
net.layers = calloc(net.n, sizeof(layer));
|
net.layers = (layer*)calloc(net.n, sizeof(layer));
|
||||||
net.seen = calloc(1, sizeof(uint64_t));
|
net.seen = (uint64_t*)calloc(1, sizeof(uint64_t));
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
net.input_gpu = calloc(1, sizeof(float *));
|
net.input_gpu = (float**)calloc(1, sizeof(float*));
|
||||||
net.truth_gpu = calloc(1, sizeof(float *));
|
net.truth_gpu = (float**)calloc(1, sizeof(float*));
|
||||||
|
|
||||||
net.input16_gpu = calloc(1, sizeof(float *));
|
net.input16_gpu = (float**)calloc(1, sizeof(float*));
|
||||||
net.output16_gpu = calloc(1, sizeof(float *));
|
net.output16_gpu = (float**)calloc(1, sizeof(float*));
|
||||||
net.max_input16_size = calloc(1, sizeof(size_t));
|
net.max_input16_size = (size_t*)calloc(1, sizeof(size_t));
|
||||||
net.max_output16_size = calloc(1, sizeof(size_t));
|
net.max_output16_size = (size_t*)calloc(1, sizeof(size_t));
|
||||||
#endif
|
#endif
|
||||||
return net;
|
return net;
|
||||||
}
|
}
|
||||||
@ -300,8 +300,8 @@ float train_network_datum(network net, float *x, float *y)
|
|||||||
float train_network_sgd(network net, data d, int n)
|
float train_network_sgd(network net, data d, int n)
|
||||||
{
|
{
|
||||||
int batch = net.batch;
|
int batch = net.batch;
|
||||||
float *X = calloc(batch*d.X.cols, sizeof(float));
|
float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
|
||||||
float *y = calloc(batch*d.y.cols, sizeof(float));
|
float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
@ -320,8 +320,8 @@ float train_network(network net, data d)
|
|||||||
assert(d.X.rows % net.batch == 0);
|
assert(d.X.rows % net.batch == 0);
|
||||||
int batch = net.batch;
|
int batch = net.batch;
|
||||||
int n = d.X.rows / batch;
|
int n = d.X.rows / batch;
|
||||||
float *X = calloc(batch*d.X.cols, sizeof(float));
|
float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
|
||||||
float *y = calloc(batch*d.y.cols, sizeof(float));
|
float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
@ -389,11 +389,11 @@ int recalculate_workspace_size(network *net)
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
free(net->workspace);
|
free(net->workspace);
|
||||||
net->workspace = calloc(1, workspace_size);
|
net->workspace = (float*)calloc(1, workspace_size);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
free(net->workspace);
|
free(net->workspace);
|
||||||
net->workspace = calloc(1, workspace_size);
|
net->workspace = (float*)calloc(1, workspace_size);
|
||||||
#endif
|
#endif
|
||||||
//fprintf(stderr, " Done!\n");
|
//fprintf(stderr, " Done!\n");
|
||||||
return 0;
|
return 0;
|
||||||
@ -495,19 +495,19 @@ int resize_network(network *net, int w, int h)
|
|||||||
net->input_pinned_cpu_flag = 1;
|
net->input_pinned_cpu_flag = 1;
|
||||||
else {
|
else {
|
||||||
cudaGetLastError(); // reset CUDA-error
|
cudaGetLastError(); // reset CUDA-error
|
||||||
net->input_pinned_cpu = calloc(size, sizeof(float));
|
net->input_pinned_cpu = (float*)calloc(size, sizeof(float));
|
||||||
net->input_pinned_cpu_flag = 0;
|
net->input_pinned_cpu_flag = 0;
|
||||||
}
|
}
|
||||||
printf(" CUDA allocate done! \n");
|
printf(" CUDA allocate done! \n");
|
||||||
}else {
|
}else {
|
||||||
free(net->workspace);
|
free(net->workspace);
|
||||||
net->workspace = calloc(1, workspace_size);
|
net->workspace = (float*)calloc(1, workspace_size);
|
||||||
if(!net->input_pinned_cpu_flag)
|
if(!net->input_pinned_cpu_flag)
|
||||||
net->input_pinned_cpu = realloc(net->input_pinned_cpu, size * sizeof(float));
|
net->input_pinned_cpu = (float*)realloc(net->input_pinned_cpu, size * sizeof(float));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
free(net->workspace);
|
free(net->workspace);
|
||||||
net->workspace = calloc(1, workspace_size);
|
net->workspace = (float*)calloc(1, workspace_size);
|
||||||
#endif
|
#endif
|
||||||
//fprintf(stderr, " Done!\n");
|
//fprintf(stderr, " Done!\n");
|
||||||
return 0;
|
return 0;
|
||||||
@ -534,7 +534,7 @@ detection_layer get_network_detection_layer(network net)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Detection layer not found!!\n");
|
fprintf(stderr, "Detection layer not found!!\n");
|
||||||
detection_layer l = {0};
|
detection_layer l = { (LAYER_TYPE)0 };
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -632,11 +632,11 @@ detection *make_network_boxes(network *net, float thresh, int *num)
|
|||||||
int i;
|
int i;
|
||||||
int nboxes = num_detections(net, thresh);
|
int nboxes = num_detections(net, thresh);
|
||||||
if (num) *num = nboxes;
|
if (num) *num = nboxes;
|
||||||
detection *dets = calloc(nboxes, sizeof(detection));
|
detection* dets = (detection*)calloc(nboxes, sizeof(detection));
|
||||||
for (i = 0; i < nboxes; ++i) {
|
for (i = 0; i < nboxes; ++i) {
|
||||||
dets[i].prob = calloc(l.classes, sizeof(float));
|
dets[i].prob = (float*)calloc(l.classes, sizeof(float));
|
||||||
if (l.coords > 4) {
|
if (l.coords > 4) {
|
||||||
dets[i].mask = calloc(l.coords - 4, sizeof(float));
|
dets[i].mask = (float*)calloc(l.coords - 4, sizeof(float));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return dets;
|
return dets;
|
||||||
@ -645,10 +645,10 @@ detection *make_network_boxes(network *net, float thresh, int *num)
|
|||||||
|
|
||||||
void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter)
|
void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter)
|
||||||
{
|
{
|
||||||
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
|
box* boxes = (box*)calloc(l.w * l.h * l.n, sizeof(box));
|
||||||
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
|
float** probs = (float**)calloc(l.w * l.h * l.n, sizeof(float*));
|
||||||
int i, j;
|
int i, j;
|
||||||
for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float));
|
for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
|
||||||
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
|
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
|
||||||
for (j = 0; j < l.w*l.h*l.n; ++j) {
|
for (j = 0; j < l.w*l.h*l.n; ++j) {
|
||||||
dets[j].classes = l.classes;
|
dets[j].classes = l.classes;
|
||||||
@ -789,7 +789,7 @@ matrix network_predict_data_multi(network net, data test, int n)
|
|||||||
int i,j,b,m;
|
int i,j,b,m;
|
||||||
int k = get_network_output_size(net);
|
int k = get_network_output_size(net);
|
||||||
matrix pred = make_matrix(test.X.rows, k);
|
matrix pred = make_matrix(test.X.rows, k);
|
||||||
float *X = calloc(net.batch*test.X.rows, sizeof(float));
|
float* X = (float*)calloc(net.batch * test.X.rows, sizeof(float));
|
||||||
for(i = 0; i < test.X.rows; i += net.batch){
|
for(i = 0; i < test.X.rows; i += net.batch){
|
||||||
for(b = 0; b < net.batch; ++b){
|
for(b = 0; b < net.batch; ++b){
|
||||||
if(i+b == test.X.rows) break;
|
if(i+b == test.X.rows) break;
|
||||||
@ -814,7 +814,7 @@ matrix network_predict_data(network net, data test)
|
|||||||
int i,j,b;
|
int i,j,b;
|
||||||
int k = get_network_output_size(net);
|
int k = get_network_output_size(net);
|
||||||
matrix pred = make_matrix(test.X.rows, k);
|
matrix pred = make_matrix(test.X.rows, k);
|
||||||
float *X = calloc(net.batch*test.X.cols, sizeof(float));
|
float* X = (float*)calloc(net.batch * test.X.cols, sizeof(float));
|
||||||
for(i = 0; i < test.X.rows; i += net.batch){
|
for(i = 0; i < test.X.rows; i += net.batch){
|
||||||
for(b = 0; b < net.batch; ++b){
|
for(b = 0; b < net.batch; ++b){
|
||||||
if(i+b == test.X.rows) break;
|
if(i+b == test.X.rows) break;
|
||||||
|
@ -6,14 +6,14 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "layer.h"
|
#include "layer.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "data.h"
|
#include "data.h"
|
||||||
#include "tree.h"
|
#include "tree.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
typedef enum {
|
typedef enum {
|
||||||
CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
|
CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
//#include "cublas_v2.h"
|
//#include "cublas_v2.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
@ -35,10 +34,9 @@ extern "C" {
|
|||||||
#include "route_layer.h"
|
#include "route_layer.h"
|
||||||
#include "shortcut_layer.h"
|
#include "shortcut_layer.h"
|
||||||
#include "blas.h"
|
#include "blas.h"
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
@ -396,9 +394,11 @@ void sync_nets(network *nets, int n, int interval)
|
|||||||
float train_networks(network *nets, int n, data d, int interval)
|
float train_networks(network *nets, int n, data d, int interval)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
#ifdef _DEBUG
|
||||||
int batch = nets[0].batch;
|
int batch = nets[0].batch;
|
||||||
int subdivisions = nets[0].subdivisions;
|
int subdivisions = nets[0].subdivisions;
|
||||||
assert(batch * subdivisions * n == d.X.rows);
|
assert(batch * subdivisions * n == d.X.rows);
|
||||||
|
#endif
|
||||||
pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
|
pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
|
||||||
float *errors = (float *) calloc(n, sizeof(float));
|
float *errors = (float *) calloc(n, sizeof(float));
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui_c.h"
|
#include <opencv2/highgui/highgui_c.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2
|
// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
|
layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
|
fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
|
||||||
layer layer = {0};
|
layer layer = { (LAYER_TYPE)0 };
|
||||||
layer.type = NORMALIZATION;
|
layer.type = NORMALIZATION;
|
||||||
layer.batch = batch;
|
layer.batch = batch;
|
||||||
layer.h = layer.out_h = h;
|
layer.h = layer.out_h = h;
|
||||||
@ -15,10 +15,10 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a
|
|||||||
layer.size = size;
|
layer.size = size;
|
||||||
layer.alpha = alpha;
|
layer.alpha = alpha;
|
||||||
layer.beta = beta;
|
layer.beta = beta;
|
||||||
layer.output = calloc(h * w * c * batch, sizeof(float));
|
layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
|
||||||
layer.delta = calloc(h * w * c * batch, sizeof(float));
|
layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
|
||||||
layer.squared = calloc(h * w * c * batch, sizeof(float));
|
layer.squared = (float*)calloc(h * w * c * batch, sizeof(float));
|
||||||
layer.norms = calloc(h * w * c * batch, sizeof(float));
|
layer.norms = (float*)calloc(h * w * c * batch, sizeof(float));
|
||||||
layer.inputs = w*h*c;
|
layer.inputs = w*h*c;
|
||||||
layer.outputs = layer.inputs;
|
layer.outputs = layer.inputs;
|
||||||
|
|
||||||
@ -46,10 +46,10 @@ void resize_normalization_layer(layer *layer, int w, int h)
|
|||||||
layer->out_w = w;
|
layer->out_w = w;
|
||||||
layer->inputs = w*h*c;
|
layer->inputs = w*h*c;
|
||||||
layer->outputs = layer->inputs;
|
layer->outputs = layer->inputs;
|
||||||
layer->output = realloc(layer->output, h * w * c * batch * sizeof(float));
|
layer->output = (float*)realloc(layer->output, h * w * c * batch * sizeof(float));
|
||||||
layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float));
|
layer->delta = (float*)realloc(layer->delta, h * w * c * batch * sizeof(float));
|
||||||
layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float));
|
layer->squared = (float*)realloc(layer->squared, h * w * c * batch * sizeof(float));
|
||||||
layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float));
|
layer->norms = (float*)realloc(layer->norms, h * w * c * batch * sizeof(float));
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
cuda_free(layer->output_gpu);
|
cuda_free(layer->output_gpu);
|
||||||
cuda_free(layer->delta_gpu);
|
cuda_free(layer->delta_gpu);
|
||||||
|
@ -5,6 +5,9 @@
|
|||||||
#include "layer.h"
|
#include "layer.h"
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
|
layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
|
||||||
void resize_normalization_layer(layer *layer, int h, int w);
|
void resize_normalization_layer(layer *layer, int h, int w);
|
||||||
void forward_normalization_layer(const layer layer, network_state state);
|
void forward_normalization_layer(const layer layer, network_state state);
|
||||||
@ -16,4 +19,7 @@ void forward_normalization_layer_gpu(const layer layer, network_state state);
|
|||||||
void backward_normalization_layer_gpu(const layer layer, network_state state);
|
void backward_normalization_layer_gpu(const layer layer, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "option_list.h"
|
#include "option_list.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "data.h"
|
||||||
|
|
||||||
list *read_data_cfg(char *filename)
|
list *read_data_cfg(char *filename)
|
||||||
{
|
{
|
||||||
@ -71,7 +72,7 @@ int read_option(char *s, list *options)
|
|||||||
|
|
||||||
void option_insert(list *l, char *key, char *val)
|
void option_insert(list *l, char *key, char *val)
|
||||||
{
|
{
|
||||||
kvp *p = malloc(sizeof(kvp));
|
kvp* p = (kvp*)malloc(sizeof(kvp));
|
||||||
p->key = key;
|
p->key = key;
|
||||||
p->val = val;
|
p->val = val;
|
||||||
p->used = 0;
|
p->used = 0;
|
||||||
|
@ -9,6 +9,9 @@ typedef struct{
|
|||||||
int used;
|
int used;
|
||||||
} kvp;
|
} kvp;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
list *read_data_cfg(char *filename);
|
list *read_data_cfg(char *filename);
|
||||||
int read_option(char *s, list *options);
|
int read_option(char *s, list *options);
|
||||||
@ -28,4 +31,7 @@ void option_unused(list *l);
|
|||||||
|
|
||||||
//LIB_API metadata get_metadata(char *file);
|
//LIB_API metadata get_metadata(char *file);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
26
src/parser.c
26
src/parser.c
@ -272,7 +272,7 @@ int *parse_yolo_mask(char *a, int *num)
|
|||||||
for (i = 0; i < len; ++i) {
|
for (i = 0; i < len; ++i) {
|
||||||
if (a[i] == ',') ++n;
|
if (a[i] == ',') ++n;
|
||||||
}
|
}
|
||||||
mask = calloc(n, sizeof(int));
|
mask = (int*)calloc(n, sizeof(int));
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
int val = atoi(a);
|
int val = atoi(a);
|
||||||
mask[i] = val;
|
mask[i] = val;
|
||||||
@ -587,8 +587,8 @@ route_layer parse_route(list *options, size_params params, network net)
|
|||||||
if (l[i] == ',') ++n;
|
if (l[i] == ',') ++n;
|
||||||
}
|
}
|
||||||
|
|
||||||
int *layers = calloc(n, sizeof(int));
|
int* layers = (int*)calloc(n, sizeof(int));
|
||||||
int *sizes = calloc(n, sizeof(int));
|
int* sizes = (int*)calloc(n, sizeof(int));
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
int index = atoi(l);
|
int index = atoi(l);
|
||||||
l = strchr(l, ',')+1;
|
l = strchr(l, ',')+1;
|
||||||
@ -693,8 +693,8 @@ void parse_net_options(list *options, network *net)
|
|||||||
for(i = 0; i < len; ++i){
|
for(i = 0; i < len; ++i){
|
||||||
if (l[i] == ',') ++n;
|
if (l[i] == ',') ++n;
|
||||||
}
|
}
|
||||||
int *steps = calloc(n, sizeof(int));
|
int* steps = (int*)calloc(n, sizeof(int));
|
||||||
float *scales = calloc(n, sizeof(float));
|
float* scales = (float*)calloc(n, sizeof(float));
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
int step = atoi(l);
|
int step = atoi(l);
|
||||||
float scale = atof(p);
|
float scale = atof(p);
|
||||||
@ -765,7 +765,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
|
|||||||
fprintf(stderr, "%4d ", count);
|
fprintf(stderr, "%4d ", count);
|
||||||
s = (section *)n->val;
|
s = (section *)n->val;
|
||||||
options = s->options;
|
options = s->options;
|
||||||
layer l = {0};
|
layer l = { (LAYER_TYPE)0 };
|
||||||
LAYER_TYPE lt = string_to_layer_type(s->type);
|
LAYER_TYPE lt = string_to_layer_type(s->type);
|
||||||
if(lt == CONVOLUTIONAL){
|
if(lt == CONVOLUTIONAL){
|
||||||
l = parse_convolutional(options, params);
|
l = parse_convolutional(options, params);
|
||||||
@ -864,7 +864,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
|
|||||||
if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
|
if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
|
||||||
else {
|
else {
|
||||||
cudaGetLastError(); // reset CUDA-error
|
cudaGetLastError(); // reset CUDA-error
|
||||||
net.input_pinned_cpu = calloc(size, sizeof(float));
|
net.input_pinned_cpu = (float*)calloc(size, sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
// pre-allocate memory for inference on Tensor Cores (fp16)
|
// pre-allocate memory for inference on Tensor Cores (fp16)
|
||||||
@ -879,12 +879,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
|
|||||||
net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
|
net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
net.workspace = calloc(1, workspace_size);
|
net.workspace = (float*)calloc(1, workspace_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (workspace_size) {
|
if (workspace_size) {
|
||||||
net.workspace = calloc(1, workspace_size);
|
net.workspace = (float*)calloc(1, workspace_size);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -911,7 +911,7 @@ list *read_cfg(char *filename)
|
|||||||
strip(line);
|
strip(line);
|
||||||
switch(line[0]){
|
switch(line[0]){
|
||||||
case '[':
|
case '[':
|
||||||
current = malloc(sizeof(section));
|
current = (section*)malloc(sizeof(section));
|
||||||
list_insert(sections, current);
|
list_insert(sections, current);
|
||||||
current->options = make_list();
|
current->options = make_list();
|
||||||
current->type = line;
|
current->type = line;
|
||||||
@ -1091,7 +1091,7 @@ void save_weights(network net, char *filename)
|
|||||||
|
|
||||||
void transpose_matrix(float *a, int rows, int cols)
|
void transpose_matrix(float *a, int rows, int cols)
|
||||||
{
|
{
|
||||||
float *transpose = calloc(rows*cols, sizeof(float));
|
float* transpose = (float*)calloc(rows * cols, sizeof(float));
|
||||||
int x, y;
|
int x, y;
|
||||||
for(x = 0; x < rows; ++x){
|
for(x = 0; x < rows; ++x){
|
||||||
for(y = 0; y < cols; ++y){
|
for(y = 0; y < cols; ++y){
|
||||||
@ -1313,7 +1313,7 @@ void load_weights(network *net, char *filename)
|
|||||||
network *load_network_custom(char *cfg, char *weights, int clear, int batch)
|
network *load_network_custom(char *cfg, char *weights, int clear, int batch)
|
||||||
{
|
{
|
||||||
printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
|
printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
|
||||||
network *net = calloc(1, sizeof(network));
|
network* net = (network*)calloc(1, sizeof(network));
|
||||||
*net = parse_network_cfg_custom(cfg, batch, 0);
|
*net = parse_network_cfg_custom(cfg, batch, 0);
|
||||||
if (weights && weights[0] != 0) {
|
if (weights && weights[0] != 0) {
|
||||||
load_weights(net, weights);
|
load_weights(net, weights);
|
||||||
@ -1326,7 +1326,7 @@ network *load_network_custom(char *cfg, char *weights, int clear, int batch)
|
|||||||
network *load_network(char *cfg, char *weights, int clear)
|
network *load_network(char *cfg, char *weights, int clear)
|
||||||
{
|
{
|
||||||
printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
|
printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
|
||||||
network *net = calloc(1, sizeof(network));
|
network* net = (network*)calloc(1, sizeof(network));
|
||||||
*net = parse_network_cfg(cfg);
|
*net = parse_network_cfg(cfg);
|
||||||
if (weights && weights[0] != 0) {
|
if (weights && weights[0] != 0) {
|
||||||
load_weights(net, weights);
|
load_weights(net, weights);
|
||||||
|
@ -2,6 +2,9 @@
|
|||||||
#define PARSER_H
|
#define PARSER_H
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
network parse_network_cfg(char *filename);
|
network parse_network_cfg(char *filename);
|
||||||
network parse_network_cfg_custom(char *filename, int batch, int time_steps);
|
network parse_network_cfg_custom(char *filename, int batch, int time_steps);
|
||||||
void save_network(network net, char *filename);
|
void save_network(network net, char *filename);
|
||||||
@ -11,4 +14,7 @@ void save_weights_double(network net, char *filename);
|
|||||||
void load_weights(network *net, char *filename);
|
void load_weights(network *net, char *filename);
|
||||||
void load_weights_upto(network *net, char *filename, int cutoff);
|
void load_weights_upto(network *net, char *filename, int cutoff);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -9,11 +9,10 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#define DOABS 1
|
|
||||||
|
|
||||||
region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes)
|
region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes)
|
||||||
{
|
{
|
||||||
region_layer l = {0};
|
region_layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = REGION;
|
l.type = REGION;
|
||||||
|
|
||||||
l.n = n;
|
l.n = n;
|
||||||
@ -22,15 +21,15 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
|
|||||||
l.w = w;
|
l.w = w;
|
||||||
l.classes = classes;
|
l.classes = classes;
|
||||||
l.coords = coords;
|
l.coords = coords;
|
||||||
l.cost = calloc(1, sizeof(float));
|
l.cost = (float*)calloc(1, sizeof(float));
|
||||||
l.biases = calloc(n*2, sizeof(float));
|
l.biases = (float*)calloc(n * 2, sizeof(float));
|
||||||
l.bias_updates = calloc(n*2, sizeof(float));
|
l.bias_updates = (float*)calloc(n * 2, sizeof(float));
|
||||||
l.outputs = h*w*n*(classes + coords + 1);
|
l.outputs = h*w*n*(classes + coords + 1);
|
||||||
l.inputs = l.outputs;
|
l.inputs = l.outputs;
|
||||||
l.max_boxes = max_boxes;
|
l.max_boxes = max_boxes;
|
||||||
l.truths = max_boxes*(5);
|
l.truths = max_boxes*(5);
|
||||||
l.delta = calloc(batch*l.outputs, sizeof(float));
|
l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
|
||||||
l.output = calloc(batch*l.outputs, sizeof(float));
|
l.output = (float*)calloc(batch * l.outputs, sizeof(float));
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < n*2; ++i){
|
for(i = 0; i < n*2; ++i){
|
||||||
l.biases[i] = .5;
|
l.biases[i] = .5;
|
||||||
@ -61,8 +60,8 @@ void resize_region_layer(layer *l, int w, int h)
|
|||||||
l->outputs = h*w*l->n*(l->classes + l->coords + 1);
|
l->outputs = h*w*l->n*(l->classes + l->coords + 1);
|
||||||
l->inputs = l->outputs;
|
l->inputs = l->outputs;
|
||||||
|
|
||||||
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
|
l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
|
||||||
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
|
l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if (old_w < w || old_h < h) {
|
if (old_w < w || old_h < h) {
|
||||||
@ -444,11 +443,11 @@ void forward_region_layer_gpu(const region_layer l, network_state state)
|
|||||||
softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
|
softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
|
float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
|
||||||
float *truth_cpu = 0;
|
float *truth_cpu = 0;
|
||||||
if(state.truth){
|
if(state.truth){
|
||||||
int num_truth = l.batch*l.truths;
|
int num_truth = l.batch*l.truths;
|
||||||
truth_cpu = calloc(num_truth, sizeof(float));
|
truth_cpu = (float*)calloc(num_truth, sizeof(float));
|
||||||
cuda_pull_array(state.truth, truth_cpu, num_truth);
|
cuda_pull_array(state.truth, truth_cpu, num_truth);
|
||||||
}
|
}
|
||||||
cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs);
|
cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs);
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
|
|
||||||
typedef layer region_layer;
|
typedef layer region_layer;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords, int max_boxes);
|
region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords, int max_boxes);
|
||||||
void forward_region_layer(const region_layer l, network_state state);
|
void forward_region_layer(const region_layer l, network_state state);
|
||||||
void backward_region_layer(const region_layer l, network_state state);
|
void backward_region_layer(const region_layer l, network_state state);
|
||||||
@ -20,4 +23,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state);
|
|||||||
void backward_region_layer_gpu(region_layer l, network_state state);
|
void backward_region_layer_gpu(region_layer l, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
|
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
|
||||||
{
|
{
|
||||||
layer l = {0};
|
layer l = { (LAYER_TYPE)0 };
|
||||||
l.type = REORG;
|
l.type = REORG;
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
l.stride = stride;
|
l.stride = stride;
|
||||||
@ -27,8 +27,8 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
|
|||||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||||
l.inputs = h*w*c;
|
l.inputs = h*w*c;
|
||||||
int output_size = l.out_h * l.out_w * l.out_c * batch;
|
int output_size = l.out_h * l.out_w * l.out_c * batch;
|
||||||
l.output = calloc(output_size, sizeof(float));
|
l.output = (float*)calloc(output_size, sizeof(float));
|
||||||
l.delta = calloc(output_size, sizeof(float));
|
l.delta = (float*)calloc(output_size, sizeof(float));
|
||||||
|
|
||||||
l.forward = forward_reorg_layer;
|
l.forward = forward_reorg_layer;
|
||||||
l.backward = backward_reorg_layer;
|
l.backward = backward_reorg_layer;
|
||||||
@ -64,8 +64,8 @@ void resize_reorg_layer(layer *l, int w, int h)
|
|||||||
l->inputs = l->outputs;
|
l->inputs = l->outputs;
|
||||||
int output_size = l->outputs * l->batch;
|
int output_size = l->outputs * l->batch;
|
||||||
|
|
||||||
l->output = realloc(l->output, output_size * sizeof(float));
|
l->output = (float*)realloc(l->output, output_size * sizeof(float));
|
||||||
l->delta = realloc(l->delta, output_size * sizeof(float));
|
l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
cuda_free(l->output_gpu);
|
cuda_free(l->output_gpu);
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
#include "layer.h"
|
#include "layer.h"
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse);
|
layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse);
|
||||||
void resize_reorg_layer(layer *l, int w, int h);
|
void resize_reorg_layer(layer *l, int w, int h);
|
||||||
void forward_reorg_layer(const layer l, network_state state);
|
void forward_reorg_layer(const layer l, network_state state);
|
||||||
@ -16,5 +19,8 @@ void forward_reorg_layer_gpu(layer l, network_state state);
|
|||||||
void backward_reorg_layer_gpu(layer l, network_state state);
|
void backward_reorg_layer_gpu(layer l, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user