mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
🐍 🐍 🐍 🐍 🐍
This commit is contained in:
parent
d8c5cfd6c6
commit
f9446acb68
18
Makefile
18
Makefile
@ -13,14 +13,15 @@ ARCH= -gencode arch=compute_20,code=[sm_20,sm_21] \
|
|||||||
# ARCH= -gencode arch=compute_52,code=compute_52
|
# ARCH= -gencode arch=compute_52,code=compute_52
|
||||||
|
|
||||||
VPATH=./src/:./examples
|
VPATH=./src/:./examples
|
||||||
LIB=libdarknet.so
|
SLIB=libdarknet.so
|
||||||
|
ALIB=libdarknet.a
|
||||||
EXEC=darknet
|
EXEC=darknet
|
||||||
OBJDIR=./obj/
|
OBJDIR=./obj/
|
||||||
|
|
||||||
CC=gcc
|
CC=gcc
|
||||||
NVCC=nvcc --compiler-options '-fPIC'
|
NVCC=nvcc --compiler-options '-fPIC'
|
||||||
AR=ar
|
AR=ar
|
||||||
ARFLAGS=-rv
|
ARFLAGS=rcs
|
||||||
OPTS=-Ofast
|
OPTS=-Ofast
|
||||||
LDFLAGS= -lm -pthread
|
LDFLAGS= -lm -pthread
|
||||||
COMMON= -Iinclude/ -Isrc/
|
COMMON= -Iinclude/ -Isrc/
|
||||||
@ -62,13 +63,16 @@ EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA))
|
|||||||
OBJS = $(addprefix $(OBJDIR), $(OBJ))
|
OBJS = $(addprefix $(OBJDIR), $(OBJ))
|
||||||
DEPS = $(wildcard src/*.h) Makefile include/darknet.h
|
DEPS = $(wildcard src/*.h) Makefile include/darknet.h
|
||||||
|
|
||||||
all: obj backup results $(LIB) $(EXEC)
|
all: obj backup results $(SLIB) $(ALIB) $(EXEC)
|
||||||
|
|
||||||
|
|
||||||
$(EXEC): $(EXECOBJ) $(LIB)
|
$(EXEC): $(EXECOBJ) $(ALIB)
|
||||||
$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(LIB)
|
$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB)
|
||||||
|
|
||||||
$(LIB): $(OBJS)
|
$(ALIB): $(OBJS)
|
||||||
|
$(AR) $(ARFLAGS) $@ $^
|
||||||
|
|
||||||
|
$(SLIB): $(OBJS)
|
||||||
$(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS)
|
$(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
$(OBJDIR)%.o: %.c $(DEPS)
|
$(OBJDIR)%.o: %.c $(DEPS)
|
||||||
@ -87,5 +91,5 @@ results:
|
|||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf $(OBJS) $(LIB) $(EXEC) $(EXECOBJ)
|
rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ)
|
||||||
|
|
||||||
|
@ -53,21 +53,24 @@ void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size,
|
|||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void add_bias_kernel(float *output, float *biases, int n, int size)
|
__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size)
|
||||||
{
|
{
|
||||||
int offset = blockIdx.x * blockDim.x + threadIdx.x;
|
int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||||
int filter = blockIdx.y;
|
if (index >= n*size*batch) return;
|
||||||
int batch = blockIdx.z;
|
int i = index % size;
|
||||||
|
index /= size;
|
||||||
|
int j = index % n;
|
||||||
|
index /= n;
|
||||||
|
int k = index;
|
||||||
|
|
||||||
if(offset < size) output[(batch*n+filter)*size + offset] += biases[filter];
|
output[(k*n+j)*size + i] += biases[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
|
void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
|
||||||
{
|
{
|
||||||
dim3 dimGrid((size-1)/BLOCK + 1, n, batch);
|
int num = n*size*batch;
|
||||||
dim3 dimBlock(BLOCK, 1, 1);
|
|
||||||
|
|
||||||
add_bias_kernel<<<dimGrid, dimBlock>>>(output, biases, n, size);
|
add_bias_kernel<<<cuda_gridsize(num), BLOCK>>>(output, biases, batch, n, size);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ int cuda_get_device()
|
|||||||
|
|
||||||
void check_error(cudaError_t status)
|
void check_error(cudaError_t status)
|
||||||
{
|
{
|
||||||
cudaDeviceSynchronize();
|
//cudaDeviceSynchronize();
|
||||||
cudaError_t status2 = cudaGetLastError();
|
cudaError_t status2 = cudaGetLastError();
|
||||||
if (status != cudaSuccess)
|
if (status != cudaSuccess)
|
||||||
{
|
{
|
||||||
|
@ -68,15 +68,6 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
|
|||||||
*(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize);
|
*(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize);
|
||||||
l.wh->batch = batch;
|
l.wh->batch = batch;
|
||||||
|
|
||||||
#ifdef CUDNN
|
|
||||||
cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w);
|
|
||||||
cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w);
|
|
||||||
cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w);
|
|
||||||
cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w);
|
|
||||||
cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w);
|
|
||||||
cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
l.batch_normalize = batch_normalize;
|
l.batch_normalize = batch_normalize;
|
||||||
|
|
||||||
|
|
||||||
@ -110,6 +101,15 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
|
|||||||
l.r_gpu = cuda_make_array(0, batch*outputs);
|
l.r_gpu = cuda_make_array(0, batch*outputs);
|
||||||
l.z_gpu = cuda_make_array(0, batch*outputs);
|
l.z_gpu = cuda_make_array(0, batch*outputs);
|
||||||
l.h_gpu = cuda_make_array(0, batch*outputs);
|
l.h_gpu = cuda_make_array(0, batch*outputs);
|
||||||
|
|
||||||
|
#ifdef CUDNN
|
||||||
|
cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w);
|
||||||
|
cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w);
|
||||||
|
cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w);
|
||||||
|
cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w);
|
||||||
|
cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w);
|
||||||
|
cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return l;
|
return l;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user