mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Bias updates bug fix
This commit is contained in:
parent
809f924db2
commit
153705226d
5
Makefile
5
Makefile
@ -1,5 +1,6 @@
|
|||||||
GPU=1
|
GPU=1
|
||||||
DEBUG=0
|
DEBUG=0
|
||||||
|
ARCH= -arch=sm_35
|
||||||
|
|
||||||
VPATH=./src/
|
VPATH=./src/
|
||||||
EXEC=cnn
|
EXEC=cnn
|
||||||
@ -8,7 +9,6 @@ OBJDIR=./obj/
|
|||||||
CC=gcc
|
CC=gcc
|
||||||
NVCC=nvcc
|
NVCC=nvcc
|
||||||
OPTS=-O3
|
OPTS=-O3
|
||||||
LINKER=$(CC)
|
|
||||||
LDFLAGS=`pkg-config --libs opencv` -lm -pthread
|
LDFLAGS=`pkg-config --libs opencv` -lm -pthread
|
||||||
COMMON=`pkg-config --cflags opencv` -I/usr/local/cuda/include/
|
COMMON=`pkg-config --cflags opencv` -I/usr/local/cuda/include/
|
||||||
CFLAGS=-Wall -Wfatal-errors
|
CFLAGS=-Wall -Wfatal-errors
|
||||||
@ -20,7 +20,6 @@ CFLAGS+=-O0 -g
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(GPU), 1)
|
ifeq ($(GPU), 1)
|
||||||
LINKER=$(NVCC)
|
|
||||||
COMMON+=-DGPU
|
COMMON+=-DGPU
|
||||||
CFLAGS+=-DGPU
|
CFLAGS+=-DGPU
|
||||||
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas
|
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas
|
||||||
@ -43,7 +42,7 @@ $(OBJDIR)%.o: %.c
|
|||||||
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
|
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
$(OBJDIR)%.o: %.cu
|
$(OBJDIR)%.o: %.cu
|
||||||
$(NVCC) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
|
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
|
|
||||||
|
33
src/cnn.c
33
src/cnn.c
@ -212,7 +212,8 @@ void train_imagenet(char *cfgfile)
|
|||||||
//network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg");
|
//network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg");
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
network net = parse_network_cfg(cfgfile);
|
network net = parse_network_cfg(cfgfile);
|
||||||
set_learning_network(&net, net.learning_rate, net.momentum, net.decay);
|
//test_learn_bias(*(convolutional_layer *)net.layers[1]);
|
||||||
|
//set_learning_network(&net, net.learning_rate, 0, net.decay);
|
||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
int imgs = 3072;
|
int imgs = 3072;
|
||||||
int i = net.seen/imgs;
|
int i = net.seen/imgs;
|
||||||
@ -383,25 +384,26 @@ void test_visualize(char *filename)
|
|||||||
cvWaitKey(0);
|
cvWaitKey(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_cifar10()
|
void test_cifar10(char *cfgfile)
|
||||||
{
|
{
|
||||||
network net = parse_network_cfg("cfg/cifar10_part5.cfg");
|
network net = parse_network_cfg(cfgfile);
|
||||||
data test = load_cifar10_data("data/cifar10/test_batch.bin");
|
data test = load_cifar10_data("data/cifar10/test_batch.bin");
|
||||||
clock_t start = clock(), end;
|
clock_t start = clock(), end;
|
||||||
float test_acc = network_accuracy(net, test);
|
float test_acc = network_accuracy_multi(net, test, 10);
|
||||||
end = clock();
|
end = clock();
|
||||||
printf("%f in %f Sec\n", test_acc, (float)(end-start)/CLOCKS_PER_SEC);
|
printf("%f in %f Sec\n", test_acc, sec(end-start));
|
||||||
visualize_network(net);
|
//visualize_network(net);
|
||||||
cvWaitKey(0);
|
//cvWaitKey(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void train_cifar10()
|
void train_cifar10(char *cfgfile)
|
||||||
{
|
{
|
||||||
srand(555555);
|
srand(555555);
|
||||||
network net = parse_network_cfg("cfg/cifar10.cfg");
|
srand(time(0));
|
||||||
|
network net = parse_network_cfg(cfgfile);
|
||||||
data test = load_cifar10_data("data/cifar10/test_batch.bin");
|
data test = load_cifar10_data("data/cifar10/test_batch.bin");
|
||||||
int count = 0;
|
int count = 0;
|
||||||
int iters = 10000/net.batch;
|
int iters = 50000/net.batch;
|
||||||
data train = load_all_cifar10();
|
data train = load_all_cifar10();
|
||||||
while(++count <= 10000){
|
while(++count <= 10000){
|
||||||
clock_t time = clock();
|
clock_t time = clock();
|
||||||
@ -410,9 +412,9 @@ void train_cifar10()
|
|||||||
if(count%10 == 0){
|
if(count%10 == 0){
|
||||||
float test_acc = network_accuracy(net, test);
|
float test_acc = network_accuracy(net, test);
|
||||||
printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds\n", count, loss, test_acc,sec(clock()-time));
|
printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds\n", count, loss, test_acc,sec(clock()-time));
|
||||||
//char buff[256];
|
char buff[256];
|
||||||
//sprintf(buff, "unikitty/cifar10_%d.cfg", count);
|
sprintf(buff, "/home/pjreddie/imagenet_backup/cifar10_%d.cfg", count);
|
||||||
//save_network(net, buff);
|
save_network(net, buff);
|
||||||
}else{
|
}else{
|
||||||
printf("%d: Loss: %f, Time: %lf seconds\n", count, loss, sec(clock()-time));
|
printf("%d: Loss: %f, Time: %lf seconds\n", count, loss, sec(clock()-time));
|
||||||
}
|
}
|
||||||
@ -709,8 +711,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(0==strcmp(argv[1], "cifar")) train_cifar10();
|
if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet();
|
||||||
else if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet();
|
|
||||||
else if(0==strcmp(argv[1], "test_correct_nist")) test_correct_nist();
|
else if(0==strcmp(argv[1], "test_correct_nist")) test_correct_nist();
|
||||||
else if(0==strcmp(argv[1], "test")) test_imagenet();
|
else if(0==strcmp(argv[1], "test")) test_imagenet();
|
||||||
//else if(0==strcmp(argv[1], "server")) run_server();
|
//else if(0==strcmp(argv[1], "server")) run_server();
|
||||||
@ -724,7 +725,9 @@ int main(int argc, char **argv)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else if(0==strcmp(argv[1], "detection")) train_detection_net(argv[2]);
|
else if(0==strcmp(argv[1], "detection")) train_detection_net(argv[2]);
|
||||||
|
else if(0==strcmp(argv[1], "ctrain")) train_cifar10(argv[2]);
|
||||||
else if(0==strcmp(argv[1], "nist")) train_nist(argv[2]);
|
else if(0==strcmp(argv[1], "nist")) train_nist(argv[2]);
|
||||||
|
else if(0==strcmp(argv[1], "ctest")) test_cifar10(argv[2]);
|
||||||
else if(0==strcmp(argv[1], "train")) train_imagenet(argv[2]);
|
else if(0==strcmp(argv[1], "train")) train_imagenet(argv[2]);
|
||||||
//else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]);
|
//else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]);
|
||||||
else if(0==strcmp(argv[1], "detect")) test_detection(argv[2]);
|
else if(0==strcmp(argv[1], "detect")) test_detection(argv[2]);
|
||||||
|
@ -78,8 +78,6 @@ void secret_update_connected_layer(connected_layer *layer)
|
|||||||
axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1);
|
axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1);
|
||||||
scal_cpu(layer->outputs, 0, layer->bias_updates, 1);
|
scal_cpu(layer->outputs, 0, layer->bias_updates, 1);
|
||||||
|
|
||||||
//printf("rate: %f\n", layer->learning_rate);
|
|
||||||
|
|
||||||
axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1);
|
axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1);
|
||||||
|
|
||||||
axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1);
|
axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1);
|
||||||
|
@ -32,7 +32,7 @@ __global__ void learn_bias(int batch, int n, int size, float *delta, float *bias
|
|||||||
{
|
{
|
||||||
__shared__ float part[BLOCK];
|
__shared__ float part[BLOCK];
|
||||||
int i,b;
|
int i,b;
|
||||||
int filter = (blockIdx.x + blockIdx.y*gridDim.x);
|
int filter = blockIdx.x;
|
||||||
int p = threadIdx.x;
|
int p = threadIdx.x;
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
for(b = 0; b < batch; ++b){
|
for(b = 0; b < batch; ++b){
|
||||||
@ -52,8 +52,7 @@ extern "C" void learn_bias_convolutional_layer_ongpu(convolutional_layer layer)
|
|||||||
{
|
{
|
||||||
int size = convolutional_out_height(layer)*convolutional_out_width(layer);
|
int size = convolutional_out_height(layer)*convolutional_out_width(layer);
|
||||||
|
|
||||||
|
learn_bias<<<layer.n, BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu);
|
||||||
learn_bias<<<cuda_gridsize(layer.n), BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu);
|
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,9 +95,6 @@ extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float
|
|||||||
gemm_ongpu(0,0,m,n,k,1.,a,k,b,n,1.,c+i*m*n,n);
|
gemm_ongpu(0,0,m,n,k,1.,a,k,b,n,1.,c+i*m*n,n);
|
||||||
}
|
}
|
||||||
activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation);
|
activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation);
|
||||||
cuda_pull_array(layer.output_gpu, layer.output, m*n*layer.batch);
|
|
||||||
//for(i = 0; i < m*n*layer.batch; ++i) printf("%f, ", layer.output[i]);
|
|
||||||
//printf("\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu)
|
extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu)
|
||||||
@ -153,6 +149,16 @@ extern "C" void push_convolutional_layer(convolutional_layer layer)
|
|||||||
extern "C" void update_convolutional_layer_gpu(convolutional_layer layer)
|
extern "C" void update_convolutional_layer_gpu(convolutional_layer layer)
|
||||||
{
|
{
|
||||||
int size = layer.size*layer.size*layer.c*layer.n;
|
int size = layer.size*layer.size*layer.c*layer.n;
|
||||||
|
|
||||||
|
/*
|
||||||
|
cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
||||||
|
cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
|
||||||
|
cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size);
|
||||||
|
cuda_pull_array(layer.filters_gpu, layer.filters, size);
|
||||||
|
printf("Bias: %f updates: %f\n", mse_array(layer.biases, layer.n), mse_array(layer.bias_updates, layer.n));
|
||||||
|
printf("Filter: %f updates: %f\n", mse_array(layer.filters, layer.n), mse_array(layer.filter_updates, layer.n));
|
||||||
|
*/
|
||||||
|
|
||||||
axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
||||||
scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1);
|
scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1);
|
||||||
|
|
||||||
|
@ -239,7 +239,8 @@ void *load_in_thread(void *ptr)
|
|||||||
{
|
{
|
||||||
struct load_args a = *(struct load_args*)ptr;
|
struct load_args a = *(struct load_args*)ptr;
|
||||||
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.k, a.h, a.w);
|
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.k, a.h, a.w);
|
||||||
normalize_data_rows(*a.d);
|
translate_data_rows(*a.d, -144);
|
||||||
|
scale_data_rows(*a.d, 1./128);
|
||||||
free(ptr);
|
free(ptr);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -42,8 +42,6 @@ char *get_layer_string(LAYER_TYPE a)
|
|||||||
return "none";
|
return "none";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
network make_network(int n, int batch)
|
network make_network(int n, int batch)
|
||||||
{
|
{
|
||||||
network net;
|
network net;
|
||||||
@ -61,7 +59,6 @@ network make_network(int n, int batch)
|
|||||||
return net;
|
return net;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void forward_network(network net, float *input, float *truth, int train)
|
void forward_network(network net, float *input, float *truth, int train)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -176,6 +176,7 @@ float * get_network_delta_gpu_layer(network net, int i)
|
|||||||
|
|
||||||
float train_network_datum_gpu(network net, float *x, float *y)
|
float train_network_datum_gpu(network net, float *x, float *y)
|
||||||
{
|
{
|
||||||
|
//clock_t time = clock();
|
||||||
int x_size = get_network_input_size(net)*net.batch;
|
int x_size = get_network_input_size(net)*net.batch;
|
||||||
int y_size = get_network_output_size(net)*net.batch;
|
int y_size = get_network_output_size(net)*net.batch;
|
||||||
if(!*net.input_gpu){
|
if(!*net.input_gpu){
|
||||||
@ -185,10 +186,18 @@ float train_network_datum_gpu(network net, float *x, float *y)
|
|||||||
cuda_push_array(*net.input_gpu, x, x_size);
|
cuda_push_array(*net.input_gpu, x, x_size);
|
||||||
cuda_push_array(*net.truth_gpu, y, y_size);
|
cuda_push_array(*net.truth_gpu, y, y_size);
|
||||||
}
|
}
|
||||||
|
//printf("trans %f\n", sec(clock() - time));
|
||||||
|
//time = clock();
|
||||||
forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1);
|
forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1);
|
||||||
|
//printf("forw %f\n", sec(clock() - time));
|
||||||
|
//time = clock();
|
||||||
backward_network_gpu(net, *net.input_gpu);
|
backward_network_gpu(net, *net.input_gpu);
|
||||||
|
//printf("back %f\n", sec(clock() - time));
|
||||||
|
//time = clock();
|
||||||
update_network_gpu(net);
|
update_network_gpu(net);
|
||||||
float error = get_network_cost(net);
|
float error = get_network_cost(net);
|
||||||
|
//printf("updt %f\n", sec(clock() - time));
|
||||||
|
//time = clock();
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,6 +233,14 @@ float constrain(float a, float max)
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float mse_array(float *a, int n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
float sum = 0;
|
||||||
|
for(i = 0; i < n; ++i) sum += a[i]*a[i];
|
||||||
|
return sqrt(sum/n);
|
||||||
|
}
|
||||||
|
|
||||||
void normalize_array(float *a, int n)
|
void normalize_array(float *a, int n)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -22,6 +22,7 @@ void scale_array(float *a, int n, float s);
|
|||||||
void translate_array(float *a, int n, float s);
|
void translate_array(float *a, int n, float s);
|
||||||
int max_index(float *a, int n);
|
int max_index(float *a, int n);
|
||||||
float constrain(float a, float max);
|
float constrain(float a, float max);
|
||||||
|
float mse_array(float *a, int n);
|
||||||
float rand_normal();
|
float rand_normal();
|
||||||
float rand_uniform();
|
float rand_uniform();
|
||||||
float sum_array(float *a, int n);
|
float sum_array(float *a, int n);
|
||||||
|
Loading…
Reference in New Issue
Block a user