mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Compiles for OpenCV 3.1
This commit is contained in:
72
Makefile
72
Makefile
@@ -1,28 +1,45 @@
|
|||||||
GPU=0
|
#
|
||||||
CUDNN=0
|
# Default make builds both original darknet, and its CPP equivalent darknet-cpp
|
||||||
OPENCV=0
|
# make darknet - only darknet (original code)
|
||||||
DEBUG=0
|
# make darknet-cpp - only the CPP version
|
||||||
|
#
|
||||||
|
# CPP version supports OpenCV3. Tested on Ubuntu 16.04
|
||||||
|
#
|
||||||
|
# OPENCV=1 (C++ && CV3, or C && CV2 only - check with pkg-config --modversion opencv)
|
||||||
|
# When building CV3 and C version, will get errors like
|
||||||
|
# ./obj/image.o: In function `cvPointFrom32f':
|
||||||
|
# /usr/local/include/opencv2/core/types_c.h:929: undefined reference to `cvRound'
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
ARCH= -gencode arch=compute_20,code=[sm_20,sm_21] \
|
GPU=1
|
||||||
-gencode arch=compute_30,code=sm_30 \
|
CUDNN=1
|
||||||
-gencode arch=compute_35,code=sm_35 \
|
OPENCV=1
|
||||||
-gencode arch=compute_50,code=[sm_50,compute_50] \
|
DEBUG=1
|
||||||
-gencode arch=compute_52,code=[sm_52,compute_52]
|
|
||||||
|
|
||||||
# This is what I use, uncomment if you know your arch and want to specify
|
ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
|
||||||
# ARCH= -gencode arch=compute_52,code=compute_52
|
|
||||||
|
# C Definitions
|
||||||
|
|
||||||
VPATH=./src/
|
VPATH=./src/
|
||||||
EXEC=darknet
|
EXEC=darknet
|
||||||
OBJDIR=./obj/
|
OBJDIR=./obj/
|
||||||
|
|
||||||
CC=gcc
|
CC=gcc
|
||||||
NVCC=nvcc
|
|
||||||
|
# C++ Definitions
|
||||||
|
EXEC_CPP=darknet-cpp
|
||||||
|
OBJDIR_CPP=./obj-cpp/
|
||||||
|
CC_CPP=g++
|
||||||
|
CFLAGS_CPP=-Wno-write-strings
|
||||||
|
|
||||||
|
NVCC=nvcc -ccbin /usr/bin/g++
|
||||||
OPTS=-Ofast
|
OPTS=-Ofast
|
||||||
LDFLAGS= -lm -pthread
|
LDFLAGS= -lm -pthread
|
||||||
COMMON=
|
COMMON=
|
||||||
CFLAGS=-Wall -Wfatal-errors
|
CFLAGS=-Wall -Wfatal-errors
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(DEBUG), 1)
|
ifeq ($(DEBUG), 1)
|
||||||
OPTS=-O0 -g
|
OPTS=-O0 -g
|
||||||
endif
|
endif
|
||||||
@@ -36,6 +53,9 @@ LDFLAGS+= `pkg-config --libs opencv`
|
|||||||
COMMON+= `pkg-config --cflags opencv`
|
COMMON+= `pkg-config --cflags opencv`
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# Place the IPP .a file from OpenCV here for easy linking
|
||||||
|
LDFLAGS += -L/usr/local/share/OpenCV/3rdparty/lib
|
||||||
|
|
||||||
ifeq ($(GPU), 1)
|
ifeq ($(GPU), 1)
|
||||||
COMMON+= -DGPU -I/usr/local/cuda/include/
|
COMMON+= -DGPU -I/usr/local/cuda/include/
|
||||||
CFLAGS+= -DGPU
|
CFLAGS+= -DGPU
|
||||||
@@ -57,21 +77,35 @@ endif
|
|||||||
OBJS = $(addprefix $(OBJDIR), $(OBJ))
|
OBJS = $(addprefix $(OBJDIR), $(OBJ))
|
||||||
DEPS = $(wildcard src/*.h) Makefile
|
DEPS = $(wildcard src/*.h) Makefile
|
||||||
|
|
||||||
all: obj backup results $(EXEC)
|
OBJS_CPP = $(addprefix $(OBJDIR_CPP), $(OBJ))
|
||||||
|
|
||||||
$(EXEC): $(OBJS)
|
all: obj obj-cpp results $(EXEC) $(EXEC_CPP)
|
||||||
$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS)
|
|
||||||
|
$(EXEC): obj clean $(OBJS)
|
||||||
|
$(CC) $(COMMON) $(CFLAGS) $(OBJS) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
$(OBJDIR)%.o: %.c $(DEPS)
|
$(OBJDIR)%.o: %.c $(DEPS)
|
||||||
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
|
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
$(EXEC_CPP): obj-cpp clean-cpp $(OBJS_CPP)
|
||||||
|
$(CC_CPP) $(COMMON) $(CFLAGS) $(OBJS_CPP) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
$(OBJDIR_CPP)%.o: %.c $(DEPS_CPP)
|
||||||
|
$(CC_CPP) $(COMMON) $(CFLAGS_CPP) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
|
||||||
$(OBJDIR)%.o: %.cu $(DEPS)
|
$(OBJDIR)%.o: %.cu $(DEPS)
|
||||||
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
|
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
|
||||||
|
|
||||||
|
$(OBJDIR_CPP)%.o: %.cu $(DEPS)
|
||||||
|
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
|
||||||
|
|
||||||
|
|
||||||
obj:
|
obj:
|
||||||
mkdir -p obj
|
mkdir -p obj
|
||||||
backup:
|
obj-cpp:
|
||||||
mkdir -p backup
|
mkdir -p obj-cpp
|
||||||
|
|
||||||
results:
|
results:
|
||||||
mkdir -p results
|
mkdir -p results
|
||||||
|
|
||||||
@@ -79,4 +113,6 @@ results:
|
|||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf $(OBJS) $(EXEC)
|
rm -rf $(OBJS) $(EXEC)
|
||||||
|
clean-cpp:
|
||||||
|
rm -rf $(OBJS_CPP) $(EXEC_CPP)
|
||||||
|
|
||||||
|
|||||||
@@ -2,12 +2,17 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "activations.h"
|
#include "activations.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__device__ float lhtan_activate_kernel(float x)
|
__device__ float lhtan_activate_kernel(float x)
|
||||||
{
|
{
|
||||||
if(x < 0) return .001*x;
|
if(x < 0) return .001*x;
|
||||||
@@ -152,13 +157,13 @@ __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delt
|
|||||||
if(i < n) delta[i] *= gradient_kernel(x[i], a);
|
if(i < n) delta[i] *= gradient_kernel(x[i], a);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a)
|
EXTERNC void activate_array_ongpu(float *x, int n, ACTIVATION a)
|
||||||
{
|
{
|
||||||
activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
|
activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta)
|
EXTERNC void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta)
|
||||||
{
|
{
|
||||||
gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
|
gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
|
|||||||
@@ -15,8 +15,14 @@ float gradient(float x, ACTIVATION a);
|
|||||||
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
|
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
|
||||||
void activate_array(float *x, const int n, const ACTIVATION a);
|
void activate_array(float *x, const int n, const ACTIVATION a);
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void activate_array_ongpu(float *x, int n, ACTIVATION a);
|
void activate_array_ongpu(float *x, int n, ACTIVATION a);
|
||||||
void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta);
|
void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline float stair_activate(float x)
|
static inline float stair_activate(float x)
|
||||||
|
|||||||
@@ -15,8 +15,14 @@ void forward_avgpool_layer(const avgpool_layer l, network_state state);
|
|||||||
void backward_avgpool_layer(const avgpool_layer l, network_state state);
|
void backward_avgpool_layer(const avgpool_layer l, network_state state);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void forward_avgpool_layer_gpu(avgpool_layer l, network_state state);
|
void forward_avgpool_layer_gpu(avgpool_layer l, network_state state);
|
||||||
void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
|
void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "avgpool_layer.h"
|
#include "avgpool_layer.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
}
|
||||||
@@ -43,7 +49,7 @@ __global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state)
|
EXTERNC void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
size_t n = layer.c*layer.batch;
|
size_t n = layer.c*layer.batch;
|
||||||
|
|
||||||
@@ -51,7 +57,7 @@ extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state sta
|
|||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state)
|
EXTERNC void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
size_t n = layer.c*layer.batch;
|
size_t n = layer.c*layer.batch;
|
||||||
|
|
||||||
|
|||||||
@@ -10,8 +10,14 @@ void forward_batchnorm_layer(layer l, network_state state);
|
|||||||
void backward_batchnorm_layer(layer l, network_state state);
|
void backward_batchnorm_layer(layer l, network_state state);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void forward_batchnorm_layer_gpu(layer l, network_state state);
|
void forward_batchnorm_layer_gpu(layer l, network_state state);
|
||||||
void backward_batchnorm_layer_gpu(layer l, network_state state);
|
void backward_batchnorm_layer_gpu(layer l, network_state state);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
void pull_batchnorm_layer(layer l);
|
void pull_batchnorm_layer(layer l);
|
||||||
void push_batchnorm_layer(layer l);
|
void push_batchnorm_layer(layer l);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
23
src/blas.h
23
src/blas.h
@@ -1,5 +1,9 @@
|
|||||||
#ifndef BLAS_H
|
#ifndef BLAS_H
|
||||||
#define BLAS_H
|
#define BLAS_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void flatten(float *x, int size, int layers, int batch, int forward);
|
void flatten(float *x, int size, int layers, int batch, int forward);
|
||||||
void pm(int M, int N, float *A);
|
void pm(int M, int N, float *A);
|
||||||
float *random_matrix(int rows, int cols);
|
float *random_matrix(int rows, int cols);
|
||||||
@@ -36,10 +40,16 @@ void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
|||||||
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
|
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
|
||||||
|
|
||||||
void softmax(float *input, int n, float temp, float *output);
|
void softmax(float *input, int n, float temp, float *output);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
|
void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
|
||||||
void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
||||||
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
|
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
|
||||||
@@ -67,9 +77,17 @@ void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2,
|
|||||||
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
|
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
|
||||||
void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
|
void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
|
||||||
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
|
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
|
void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
|
||||||
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
|
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
|
void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||||
void l2_gpu(int n, float *pred, float *truth, float *delta, float *error);
|
void l2_gpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||||
void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc);
|
void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc);
|
||||||
@@ -84,4 +102,9 @@ void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rat
|
|||||||
void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out);
|
void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -3,7 +3,13 @@
|
|||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "blas.h"
|
#include "blas.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
@@ -149,7 +155,7 @@ __global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float
|
|||||||
//if(index == 0) printf("%f %f %f %f\n", m[index], v[index], (rate * sqrt(1.-pow(B2, t)) / (1.-pow(B1, t)) * m[index] / (sqrt(v[index]) + eps)));
|
//if(index == 0) printf("%f %f %f %f\n", m[index], v[index], (rate * sqrt(1.-pow(B2, t)) / (1.-pow(B1, t)) * m[index] / (sqrt(v[index]) + eps)));
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
|
EXTERNC void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
|
||||||
{
|
{
|
||||||
adam_kernel<<<cuda_gridsize(n), BLOCK>>>(n, x, m, v, B1, B2, rate, eps, t);
|
adam_kernel<<<cuda_gridsize(n), BLOCK>>>(n, x, m, v, B1, B2, rate, eps, t);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -173,7 +179,7 @@ __global__ void normalize_delta_kernel(int N, float *x, float *mean, float *vari
|
|||||||
delta[index] = delta[index] * 1./(sqrt(variance[f]) + .000001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
|
delta[index] = delta[index] * 1./(sqrt(variance[f]) + .000001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
|
EXTERNC void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
|
||||||
{
|
{
|
||||||
size_t N = batch*filters*spatial;
|
size_t N = batch*filters*spatial;
|
||||||
normalize_delta_kernel<<<cuda_gridsize(N), BLOCK>>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta);
|
normalize_delta_kernel<<<cuda_gridsize(N), BLOCK>>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta);
|
||||||
@@ -277,19 +283,19 @@ __global__ void mean_delta_kernel(float *delta, float *variance, int batch, int
|
|||||||
mean_delta[i] *= (-1./sqrt(variance[i] + .000001f));
|
mean_delta[i] *= (-1./sqrt(variance[i] + .000001f));
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
|
EXTERNC void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
|
||||||
{
|
{
|
||||||
mean_delta_kernel<<<cuda_gridsize(filters), BLOCK>>>(delta, variance, batch, filters, spatial, mean_delta);
|
mean_delta_kernel<<<cuda_gridsize(filters), BLOCK>>>(delta, variance, batch, filters, spatial, mean_delta);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
|
EXTERNC void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
|
||||||
{
|
{
|
||||||
fast_mean_delta_kernel<<<filters, BLOCK>>>(delta, variance, batch, filters, spatial, mean_delta);
|
fast_mean_delta_kernel<<<filters, BLOCK>>>(delta, variance, batch, filters, spatial, mean_delta);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
|
EXTERNC void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
|
||||||
{
|
{
|
||||||
fast_variance_delta_kernel<<<filters, BLOCK>>>(x, delta, mean, variance, batch, filters, spatial, variance_delta);
|
fast_variance_delta_kernel<<<filters, BLOCK>>>(x, delta, mean, variance, batch, filters, spatial, variance_delta);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -422,7 +428,7 @@ __global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
|
EXTERNC void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
|
||||||
{
|
{
|
||||||
size_t N = batch*filters*spatial;
|
size_t N = batch*filters*spatial;
|
||||||
normalize_kernel<<<cuda_gridsize(N), BLOCK>>>(N, x, mean, variance, batch, filters, spatial);
|
normalize_kernel<<<cuda_gridsize(N), BLOCK>>>(N, x, mean, variance, batch, filters, spatial);
|
||||||
@@ -484,60 +490,60 @@ __global__ void fast_variance_kernel(float *x, float *mean, int batch, int filt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
|
EXTERNC void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
|
||||||
{
|
{
|
||||||
fast_mean_kernel<<<filters, BLOCK>>>(x, batch, filters, spatial, mean);
|
fast_mean_kernel<<<filters, BLOCK>>>(x, batch, filters, spatial, mean);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
|
EXTERNC void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
|
||||||
{
|
{
|
||||||
fast_variance_kernel<<<filters, BLOCK>>>(x, mean, batch, filters, spatial, variance);
|
fast_variance_kernel<<<filters, BLOCK>>>(x, mean, batch, filters, spatial, variance);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
|
EXTERNC void mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
|
||||||
{
|
{
|
||||||
mean_kernel<<<cuda_gridsize(filters), BLOCK>>>(x, batch, filters, spatial, mean);
|
mean_kernel<<<cuda_gridsize(filters), BLOCK>>>(x, batch, filters, spatial, mean);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
|
EXTERNC void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
|
||||||
{
|
{
|
||||||
variance_kernel<<<cuda_gridsize(filters), BLOCK>>>(x, mean, batch, filters, spatial, variance);
|
variance_kernel<<<cuda_gridsize(filters), BLOCK>>>(x, mean, batch, filters, spatial, variance);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY)
|
EXTERNC void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY)
|
||||||
{
|
{
|
||||||
axpy_ongpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY);
|
axpy_ongpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void pow_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY)
|
EXTERNC void pow_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY)
|
||||||
{
|
{
|
||||||
pow_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX, Y, INCY);
|
pow_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX, Y, INCY);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY)
|
EXTERNC void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY)
|
||||||
{
|
{
|
||||||
axpy_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY);
|
axpy_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY)
|
EXTERNC void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY)
|
||||||
{
|
{
|
||||||
copy_ongpu_offset(N, X, 0, INCX, Y, 0, INCY);
|
copy_ongpu_offset(N, X, 0, INCX, Y, 0, INCY);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void mul_ongpu(int N, float * X, int INCX, float * Y, int INCY)
|
EXTERNC void mul_ongpu(int N, float * X, int INCX, float * Y, int INCY)
|
||||||
{
|
{
|
||||||
mul_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, INCX, Y, INCY);
|
mul_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, INCX, Y, INCY);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY)
|
EXTERNC void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY)
|
||||||
{
|
{
|
||||||
copy_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, OFFX, INCX, Y, OFFY, INCY);
|
copy_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, OFFX, INCX, Y, OFFY, INCY);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -560,52 +566,52 @@ __global__ void flatten_kernel(int N, float *x, int spatial, int layers, int bat
|
|||||||
else out[i1] = x[i2];
|
else out[i1] = x[i2];
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out)
|
EXTERNC void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out)
|
||||||
{
|
{
|
||||||
int size = spatial*batch*layers;
|
int size = spatial*batch*layers;
|
||||||
flatten_kernel<<<cuda_gridsize(size), BLOCK>>>(size, x, spatial, layers, batch, forward, out);
|
flatten_kernel<<<cuda_gridsize(size), BLOCK>>>(size, x, spatial, layers, batch, forward, out);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
|
EXTERNC void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
|
||||||
{
|
{
|
||||||
int size = w*h*c*batch;
|
int size = w*h*c*batch;
|
||||||
reorg_kernel<<<cuda_gridsize(size), BLOCK>>>(size, x, w, h, c, batch, stride, forward, out);
|
reorg_kernel<<<cuda_gridsize(size), BLOCK>>>(size, x, w, h, c, batch, stride, forward, out);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void mask_ongpu(int N, float * X, float mask_num, float * mask)
|
EXTERNC void mask_ongpu(int N, float * X, float mask_num, float * mask)
|
||||||
{
|
{
|
||||||
mask_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, mask_num, mask);
|
mask_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, mask_num, mask);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void const_ongpu(int N, float ALPHA, float * X, int INCX)
|
EXTERNC void const_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||||
{
|
{
|
||||||
const_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
const_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void constrain_ongpu(int N, float ALPHA, float * X, int INCX)
|
EXTERNC void constrain_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||||
{
|
{
|
||||||
constrain_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
constrain_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX)
|
EXTERNC void scal_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||||
{
|
{
|
||||||
scal_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
scal_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void supp_ongpu(int N, float ALPHA, float * X, int INCX)
|
EXTERNC void supp_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||||
{
|
{
|
||||||
supp_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
supp_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX)
|
EXTERNC void fill_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||||
{
|
{
|
||||||
fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -628,7 +634,7 @@ __global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stri
|
|||||||
out[out_index] += add[add_index];
|
out[out_index] += add[add_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
|
EXTERNC void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
|
||||||
{
|
{
|
||||||
int minw = (w1 < w2) ? w1 : w2;
|
int minw = (w1 < w2) ? w1 : w2;
|
||||||
int minh = (h1 < h2) ? h1 : h2;
|
int minh = (h1 < h2) ? h1 : h2;
|
||||||
@@ -663,7 +669,7 @@ __global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error)
|
EXTERNC void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||||
{
|
{
|
||||||
smooth_l1_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
|
smooth_l1_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -679,7 +685,7 @@ __global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error)
|
EXTERNC void l2_gpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||||
{
|
{
|
||||||
l2_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
|
l2_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -695,7 +701,7 @@ __global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c)
|
EXTERNC void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c)
|
||||||
{
|
{
|
||||||
weighted_sum_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, s, c);
|
weighted_sum_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, s, c);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -711,7 +717,7 @@ __global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc)
|
EXTERNC void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc)
|
||||||
{
|
{
|
||||||
weighted_delta_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, s, da, db, ds, dc);
|
weighted_delta_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, s, da, db, ds, dc);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -725,7 +731,7 @@ __global__ void mult_add_into_kernel(int n, float *a, float *b, float *c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c)
|
EXTERNC void mult_add_into_gpu(int num, float *a, float *b, float *c)
|
||||||
{
|
{
|
||||||
mult_add_into_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, c);
|
mult_add_into_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, c);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
@@ -758,7 +764,7 @@ __global__ void softmax_kernel(int n, int offset, int batch, float *input, float
|
|||||||
softmax_device(n, input + b*offset, temp, output + b*offset);
|
softmax_device(n, input + b*offset, temp, output + b*offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output)
|
EXTERNC void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output)
|
||||||
{
|
{
|
||||||
int inputs = n;
|
int inputs = n;
|
||||||
int batch = groups;
|
int batch = groups;
|
||||||
|
|||||||
@@ -6,8 +6,14 @@ void col2im_cpu(float* data_col,
|
|||||||
int ksize, int stride, int pad, float* data_im);
|
int ksize, int stride, int pad, float* data_im);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void col2im_ongpu(float *data_col,
|
void col2im_ongpu(float *data_col,
|
||||||
int channels, int height, int width,
|
int channels, int height, int width,
|
||||||
int ksize, int stride, int pad, float *data_im);
|
int ksize, int stride, int pad, float *data_im);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "col2im.h"
|
#include "col2im.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "convolutional_layer.h"
|
#include "convolutional_layer.h"
|
||||||
#include "batchnorm_layer.h"
|
#include "batchnorm_layer.h"
|
||||||
#include "gemm.h"
|
#include "gemm.h"
|
||||||
|
|||||||
@@ -10,6 +10,9 @@
|
|||||||
typedef layer convolutional_layer;
|
typedef layer convolutional_layer;
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
||||||
void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
||||||
void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
|
void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
|
||||||
@@ -17,6 +20,10 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float
|
|||||||
void push_convolutional_layer(convolutional_layer layer);
|
void push_convolutional_layer(convolutional_layer layer);
|
||||||
void pull_convolutional_layer(convolutional_layer layer);
|
void pull_convolutional_layer(convolutional_layer layer);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
|
void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
|
||||||
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
|
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
@@ -31,7 +38,13 @@ void forward_convolutional_layer(const convolutional_layer layer, network_state
|
|||||||
void update_convolutional_layer(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
|
void update_convolutional_layer(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
|
||||||
image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights);
|
image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights);
|
||||||
void binarize_weights(float *weights, int n, int size, float *binary);
|
void binarize_weights(float *weights, int n, int size, float *binary);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void swap_binary(convolutional_layer *l);
|
void swap_binary(convolutional_layer *l);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
void binarize_weights2(float *weights, int n, int size, char *binary, float *scales);
|
void binarize_weights2(float *weights, int n, int size, char *binary, float *scales);
|
||||||
|
|
||||||
void backward_convolutional_layer(convolutional_layer layer, network_state state);
|
void backward_convolutional_layer(convolutional_layer layer, network_state state);
|
||||||
|
|||||||
@@ -13,7 +13,13 @@ void forward_crop_layer(const crop_layer l, network_state state);
|
|||||||
void resize_crop_layer(layer *l, int w, int h);
|
void resize_crop_layer(layer *l, int w, int h);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void forward_crop_layer_gpu(crop_layer l, network_state state);
|
void forward_crop_layer_gpu(crop_layer l, network_state state);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "crop_layer.h"
|
#include "crop_layer.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
@@ -180,7 +186,7 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i
|
|||||||
output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
|
output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
|
EXTERNC void forward_crop_layer_gpu(crop_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
cuda_random(layer.rand_gpu, layer.batch*8);
|
cuda_random(layer.rand_gpu, layer.batch*8);
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,12 @@ extern int gpu_index;
|
|||||||
#include "cudnn.h"
|
#include "cudnn.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
void check_error(cudaError_t status);
|
void check_error(cudaError_t status);
|
||||||
|
void check_cublas_error(cublasStatus_t status);
|
||||||
cublasHandle_t blas_handle();
|
cublasHandle_t blas_handle();
|
||||||
float *cuda_make_array(float *x, size_t n);
|
float *cuda_make_array(float *x, size_t n);
|
||||||
int *cuda_make_int_array(size_t n);
|
int *cuda_make_int_array(size_t n);
|
||||||
@@ -31,5 +36,9 @@ dim3 cuda_gridsize(size_t n);
|
|||||||
cudnnHandle_t cudnn_handle();
|
cudnnHandle_t cudnn_handle();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -94,7 +94,13 @@ data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int
|
|||||||
list *get_paths(char *filename);
|
list *get_paths(char *filename);
|
||||||
char **get_labels(char *filename);
|
char **get_labels(char *filename);
|
||||||
void get_random_batch(data d, int n, float *X, float *y);
|
void get_random_batch(data d, int n, float *X, float *y);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
data get_data_part(data d, int part, int total);
|
data get_data_part(data d, int part, int total);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
data get_random_data(data d, int num);
|
data get_random_data(data d, int num);
|
||||||
void get_next_batch(data d, int n, int offset, float *X, float *y);
|
void get_next_batch(data d, int n, int offset, float *X, float *y);
|
||||||
data load_categorical_data_csv(char *filename, int target, int k);
|
data load_categorical_data_csv(char *filename, int target, int k);
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "convolutional_layer.h"
|
#include "convolutional_layer.h"
|
||||||
#include "deconvolutional_layer.h"
|
#include "deconvolutional_layer.h"
|
||||||
#include "gemm.h"
|
#include "gemm.h"
|
||||||
@@ -13,7 +19,7 @@ extern "C" {
|
|||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
EXTERNC void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int out_h = deconvolutional_out_height(layer);
|
int out_h = deconvolutional_out_height(layer);
|
||||||
@@ -39,7 +45,7 @@ extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, n
|
|||||||
activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation);
|
activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
EXTERNC void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
float alpha = 1./layer.batch;
|
float alpha = 1./layer.batch;
|
||||||
int out_h = deconvolutional_out_height(layer);
|
int out_h = deconvolutional_out_height(layer);
|
||||||
@@ -79,7 +85,7 @@ extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void pull_deconvolutional_layer(deconvolutional_layer layer)
|
EXTERNC void pull_deconvolutional_layer(deconvolutional_layer layer)
|
||||||
{
|
{
|
||||||
cuda_pull_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
|
cuda_pull_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
|
||||||
cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
|
cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
|
||||||
@@ -87,7 +93,7 @@ extern "C" void pull_deconvolutional_layer(deconvolutional_layer layer)
|
|||||||
cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
|
EXTERNC void push_deconvolutional_layer(deconvolutional_layer layer)
|
||||||
{
|
{
|
||||||
cuda_push_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
|
cuda_push_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
|
||||||
cuda_push_array(layer.biases_gpu, layer.biases, layer.n);
|
cuda_push_array(layer.biases_gpu, layer.biases, layer.n);
|
||||||
@@ -95,7 +101,7 @@ extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
|
|||||||
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
|
EXTERNC void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
int size = layer.size*layer.size*layer.c*layer.n;
|
int size = layer.size*layer.size*layer.c*layer.n;
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,14 @@ void backward_dropout_layer(dropout_layer l, network_state state);
|
|||||||
void resize_dropout_layer(dropout_layer *l, int inputs);
|
void resize_dropout_layer(dropout_layer *l, int inputs);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void forward_dropout_layer_gpu(dropout_layer l, network_state state);
|
void forward_dropout_layer_gpu(dropout_layer l, network_state state);
|
||||||
void backward_dropout_layer_gpu(dropout_layer l, network_state state);
|
void backward_dropout_layer_gpu(dropout_layer l, network_state state);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "dropout_layer.h"
|
#include "dropout_layer.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
float *C, int ldc);
|
float *C, int ldc);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifndef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||||
float *A_gpu, int lda,
|
float *A_gpu, int lda,
|
||||||
float *B_gpu, int ldb,
|
float *B_gpu, int ldb,
|
||||||
@@ -30,5 +33,8 @@ void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
float *B, int ldb,
|
float *B, int ldb,
|
||||||
float BETA,
|
float BETA,
|
||||||
float *C, int ldc);
|
float *C, int ldc);
|
||||||
|
#ifndef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -7,9 +7,15 @@ void im2col_cpu(float* data_im,
|
|||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void im2col_ongpu(float *im,
|
void im2col_ongpu(float *im,
|
||||||
int channels, int height, int width,
|
int channels, int height, int width,
|
||||||
int ksize, int stride, int pad,float *data_col);
|
int ksize, int stride, int pad,float *data_col);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "im2col.h"
|
#include "im2col.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,8 +15,14 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state);
|
|||||||
void backward_maxpool_layer(const maxpool_layer l, network_state state);
|
void backward_maxpool_layer(const maxpool_layer l, network_state state);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
void forward_maxpool_layer_gpu(maxpool_layer l, network_state state);
|
void forward_maxpool_layer_gpu(maxpool_layer l, network_state state);
|
||||||
void backward_maxpool_layer_gpu(maxpool_layer l, network_state state);
|
void backward_maxpool_layer_gpu(maxpool_layer l, network_state state);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "maxpool_layer.h"
|
#include "maxpool_layer.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
}
|
||||||
@@ -84,7 +90,7 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
|
|||||||
prev_delta[index] += d;
|
prev_delta[index] += d;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
EXTERNC void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int h = layer.out_h;
|
int h = layer.out_h;
|
||||||
int w = layer.out_w;
|
int w = layer.out_w;
|
||||||
@@ -96,7 +102,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta
|
|||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
EXTERNC void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
size_t n = layer.h*layer.w*layer.c*layer.batch;
|
size_t n = layer.h*layer.w*layer.c*layer.batch;
|
||||||
|
|
||||||
|
|||||||
@@ -72,6 +72,9 @@ typedef struct network_state {
|
|||||||
} network_state;
|
} network_state;
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
float train_networks(network *nets, int n, data d, int interval);
|
float train_networks(network *nets, int n, data d, int interval);
|
||||||
void sync_nets(network *nets, int n, int interval);
|
void sync_nets(network *nets, int n, int interval);
|
||||||
float train_network_datum_gpu(network net, float *x, float *y);
|
float train_network_datum_gpu(network net, float *x, float *y);
|
||||||
@@ -82,23 +85,38 @@ float *get_network_output_gpu(network net);
|
|||||||
void forward_network_gpu(network net, network_state state);
|
void forward_network_gpu(network net, network_state state);
|
||||||
void backward_network_gpu(network net, network_state state);
|
void backward_network_gpu(network net, network_state state);
|
||||||
void update_network_gpu(network net);
|
void update_network_gpu(network net);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
float get_current_rate(network net);
|
float get_current_rate(network net);
|
||||||
int get_current_batch(network net);
|
int get_current_batch(network net);
|
||||||
void free_network(network net);
|
void free_network(network net);
|
||||||
void compare_networks(network n1, network n2, data d);
|
void compare_networks(network n1, network n2, data d);
|
||||||
char *get_layer_string(LAYER_TYPE a);
|
char *get_layer_string(LAYER_TYPE a);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
network make_network(int n);
|
network make_network(int n);
|
||||||
void forward_network(network net, network_state state);
|
void forward_network(network net, network_state state);
|
||||||
void backward_network(network net, network_state state);
|
void backward_network(network net, network_state state);
|
||||||
void update_network(network net);
|
void update_network(network net);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
float train_network(network net, data d);
|
float train_network(network net, data d);
|
||||||
float train_network_batch(network net, data d, int n);
|
float train_network_batch(network net, data d, int n);
|
||||||
float train_network_sgd(network net, data d, int n);
|
float train_network_sgd(network net, data d, int n);
|
||||||
float train_network_datum(network net, float *x, float *y);
|
float train_network_datum(network net, float *x, float *y);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
matrix network_predict_data(network net, data test);
|
matrix network_predict_data(network net, data test);
|
||||||
float *network_predict(network net, float *input);
|
float *network_predict(network net, float *input);
|
||||||
@@ -111,7 +129,6 @@ float *get_network_output_layer(network net, int i);
|
|||||||
float *get_network_delta_layer(network net, int i);
|
float *get_network_delta_layer(network net, int i);
|
||||||
float *get_network_delta(network net);
|
float *get_network_delta(network net);
|
||||||
int get_network_output_size_layer(network net, int i);
|
int get_network_output_size_layer(network net, int i);
|
||||||
int get_network_output_size(network net);
|
|
||||||
image get_network_image(network net);
|
image get_network_image(network net);
|
||||||
image get_network_image_layer(network net, int i);
|
image get_network_image_layer(network net, int i);
|
||||||
int get_predicted_class_network(network net);
|
int get_predicted_class_network(network net);
|
||||||
@@ -119,8 +136,15 @@ void print_network(network net);
|
|||||||
void visualize_network(network net);
|
void visualize_network(network net);
|
||||||
int resize_network(network *net, int w, int h);
|
int resize_network(network *net, int w, int h);
|
||||||
void set_batch_network(network *net, int b);
|
void set_batch_network(network *net, int b);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
int get_network_output_size(network net);
|
||||||
int get_network_input_size(network net);
|
int get_network_input_size(network net);
|
||||||
float get_network_cost(network net);
|
float get_network_cost(network net);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int get_network_nuisance(network net);
|
int get_network_nuisance(network net);
|
||||||
int get_network_background(network net);
|
int get_network_background(network net);
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|||||||
@@ -402,7 +402,7 @@ enum
|
|||||||
|
|
||||||
typedef unsigned char stbi_uc;
|
typedef unsigned char stbi_uc;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifndef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -508,7 +508,7 @@ STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int
|
|||||||
STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
|
STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifndef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ CREDITS:
|
|||||||
#ifndef INCLUDE_STB_IMAGE_WRITE_H
|
#ifndef INCLUDE_STB_IMAGE_WRITE_H
|
||||||
#define INCLUDE_STB_IMAGE_WRITE_H
|
#define INCLUDE_STB_IMAGE_WRITE_H
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifndef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -89,7 +89,7 @@ extern int stbi_write_bmp(char const *filename, int w, int h, int comp, const vo
|
|||||||
extern int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
|
extern int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
|
||||||
extern int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
|
extern int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifndef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,13 @@
|
|||||||
#include "curand.h"
|
#include "curand.h"
|
||||||
#include "cublas_v2.h"
|
#include "cublas_v2.h"
|
||||||
|
|
||||||
extern "C" {
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EXTERNC {
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
#include "detection_layer.h"
|
#include "detection_layer.h"
|
||||||
#include "cost_layer.h"
|
#include "cost_layer.h"
|
||||||
@@ -16,11 +22,11 @@ extern "C" {
|
|||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
#include "opencv2/highgui/highgui.hpp"
|
#include "opencv2/highgui/highgui.hpp"
|
||||||
#include "opencv2/imgproc/imgproc.hpp"
|
#include "opencv2/imgproc/imgproc.hpp"
|
||||||
extern "C" image ipl_to_image(IplImage* src);
|
EXTERNC image ipl_to_image(IplImage* src);
|
||||||
extern "C" void convert_yolo_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
EXTERNC void convert_yolo_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
||||||
|
|
||||||
extern "C" char *voc_names[];
|
EXTERNC char *voc_names[];
|
||||||
extern "C" image voc_labels[];
|
EXTERNC image voc_labels[];
|
||||||
|
|
||||||
static float **probs;
|
static float **probs;
|
||||||
static box *boxes;
|
static box *boxes;
|
||||||
@@ -63,7 +69,7 @@ void *detect_in_thread(void *ptr)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index)
|
EXTERNC void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index)
|
||||||
{
|
{
|
||||||
demo_thresh = thresh;
|
demo_thresh = thresh;
|
||||||
printf("YOLO demo\n");
|
printf("YOLO demo\n");
|
||||||
@@ -125,7 +131,7 @@ extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index){
|
EXTERNC void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index){
|
||||||
fprintf(stderr, "YOLO demo needs OpenCV for webcam images.\n");
|
fprintf(stderr, "YOLO demo needs OpenCV for webcam images.\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user