From 153705226d8ca746478b69eeac9bc854766daa11 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Tue, 27 Jan 2015 13:31:06 -0800 Subject: [PATCH] Bias updates bug fix --- Makefile | 5 ++--- src/cnn.c | 33 ++++++++++++++++++--------------- src/connected_layer.c | 2 -- src/convolutional_kernels.cu | 18 ++++++++++++------ src/data.c | 3 ++- src/network.c | 3 --- src/network_kernels.cu | 9 +++++++++ src/utils.c | 8 ++++++++ src/utils.h | 1 + 9 files changed, 52 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index e48e142c..cc0c9ad1 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ GPU=1 DEBUG=0 +ARCH= -arch=sm_35 VPATH=./src/ EXEC=cnn @@ -8,7 +9,6 @@ OBJDIR=./obj/ CC=gcc NVCC=nvcc OPTS=-O3 -LINKER=$(CC) LDFLAGS=`pkg-config --libs opencv` -lm -pthread COMMON=`pkg-config --cflags opencv` -I/usr/local/cuda/include/ CFLAGS=-Wall -Wfatal-errors @@ -20,7 +20,6 @@ CFLAGS+=-O0 -g endif ifeq ($(GPU), 1) -LINKER=$(NVCC) COMMON+=-DGPU CFLAGS+=-DGPU LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas @@ -43,7 +42,7 @@ $(OBJDIR)%.o: %.c $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ $(OBJDIR)%.o: %.cu - $(NVCC) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ + $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ .PHONY: clean diff --git a/src/cnn.c b/src/cnn.c index c3b7b2c5..4f575dc4 100644 --- a/src/cnn.c +++ b/src/cnn.c @@ -212,7 +212,8 @@ void train_imagenet(char *cfgfile) //network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg"); srand(time(0)); network net = parse_network_cfg(cfgfile); - set_learning_network(&net, net.learning_rate, net.momentum, net.decay); + //test_learn_bias(*(convolutional_layer *)net.layers[1]); + //set_learning_network(&net, net.learning_rate, 0, net.decay); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 3072; int i = net.seen/imgs; @@ -383,25 +384,26 @@ void test_visualize(char *filename) cvWaitKey(0); } -void test_cifar10() +void test_cifar10(char *cfgfile) { - network net = parse_network_cfg("cfg/cifar10_part5.cfg"); + network net = parse_network_cfg(cfgfile); data test = load_cifar10_data("data/cifar10/test_batch.bin"); clock_t start = clock(), end; - float test_acc = network_accuracy(net, test); + float test_acc = network_accuracy_multi(net, test, 10); end = clock(); - printf("%f in %f Sec\n", test_acc, (float)(end-start)/CLOCKS_PER_SEC); - visualize_network(net); - cvWaitKey(0); + printf("%f in %f Sec\n", test_acc, sec(end-start)); + //visualize_network(net); + //cvWaitKey(0); } -void train_cifar10() +void train_cifar10(char *cfgfile) { srand(555555); - network net = parse_network_cfg("cfg/cifar10.cfg"); + srand(time(0)); + network net = parse_network_cfg(cfgfile); data test = load_cifar10_data("data/cifar10/test_batch.bin"); int count = 0; - int iters = 10000/net.batch; + int iters = 50000/net.batch; data train = load_all_cifar10(); while(++count <= 10000){ clock_t time = clock(); @@ -410,9 +412,9 @@ void train_cifar10() if(count%10 == 0){ float test_acc = network_accuracy(net, test); printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds\n", count, loss, test_acc,sec(clock()-time)); - //char buff[256]; - //sprintf(buff, "unikitty/cifar10_%d.cfg", count); - //save_network(net, buff); + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/cifar10_%d.cfg", count); + save_network(net, buff); }else{ printf("%d: Loss: %f, Time: %lf seconds\n", count, loss, sec(clock()-time)); } @@ -709,8 +711,7 @@ int main(int argc, char **argv) } #endif - if(0==strcmp(argv[1], "cifar")) train_cifar10(); - else if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet(); + if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet(); else if(0==strcmp(argv[1], "test_correct_nist")) test_correct_nist(); else if(0==strcmp(argv[1], "test")) test_imagenet(); //else if(0==strcmp(argv[1], "server")) run_server(); @@ -724,7 +725,9 @@ int main(int argc, char **argv) return 0; } else if(0==strcmp(argv[1], "detection")) train_detection_net(argv[2]); + else if(0==strcmp(argv[1], "ctrain")) train_cifar10(argv[2]); else if(0==strcmp(argv[1], "nist")) train_nist(argv[2]); + else if(0==strcmp(argv[1], "ctest")) test_cifar10(argv[2]); else if(0==strcmp(argv[1], "train")) train_imagenet(argv[2]); //else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]); else if(0==strcmp(argv[1], "detect")) test_detection(argv[2]); diff --git a/src/connected_layer.c b/src/connected_layer.c index 254d39e3..514dff03 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -78,8 +78,6 @@ void secret_update_connected_layer(connected_layer *layer) axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1); scal_cpu(layer->outputs, 0, layer->bias_updates, 1); - //printf("rate: %f\n", layer->learning_rate); - axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1); axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1); diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 6461aff9..eaa41613 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -32,7 +32,7 @@ __global__ void learn_bias(int batch, int n, int size, float *delta, float *bias { __shared__ float part[BLOCK]; int i,b; - int filter = (blockIdx.x + blockIdx.y*gridDim.x); + int filter = blockIdx.x; int p = threadIdx.x; float sum = 0; for(b = 0; b < batch; ++b){ @@ -52,8 +52,7 @@ extern "C" void learn_bias_convolutional_layer_ongpu(convolutional_layer layer) { int size = convolutional_out_height(layer)*convolutional_out_width(layer); - - learn_bias<<>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu); + learn_bias<<>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu); check_error(cudaPeekAtLastError()); } @@ -96,9 +95,6 @@ extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float gemm_ongpu(0,0,m,n,k,1.,a,k,b,n,1.,c+i*m*n,n); } activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation); - cuda_pull_array(layer.output_gpu, layer.output, m*n*layer.batch); - //for(i = 0; i < m*n*layer.batch; ++i) printf("%f, ", layer.output[i]); - //printf("\n"); } extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu) @@ -153,6 +149,16 @@ extern "C" void push_convolutional_layer(convolutional_layer layer) extern "C" void update_convolutional_layer_gpu(convolutional_layer layer) { int size = layer.size*layer.size*layer.c*layer.n; + +/* + cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); + cuda_pull_array(layer.biases_gpu, layer.biases, layer.n); + cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size); + cuda_pull_array(layer.filters_gpu, layer.filters, size); + printf("Bias: %f updates: %f\n", mse_array(layer.biases, layer.n), mse_array(layer.bias_updates, layer.n)); + printf("Filter: %f updates: %f\n", mse_array(layer.filters, layer.n), mse_array(layer.filter_updates, layer.n)); + */ + axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1); diff --git a/src/data.c b/src/data.c index 31aca3b9..87097b6c 100644 --- a/src/data.c +++ b/src/data.c @@ -239,7 +239,8 @@ void *load_in_thread(void *ptr) { struct load_args a = *(struct load_args*)ptr; *a.d = load_data(a.paths, a.n, a.m, a.labels, a.k, a.h, a.w); - normalize_data_rows(*a.d); + translate_data_rows(*a.d, -144); + scale_data_rows(*a.d, 1./128); free(ptr); return 0; } diff --git a/src/network.c b/src/network.c index eb39054a..f554090e 100644 --- a/src/network.c +++ b/src/network.c @@ -42,8 +42,6 @@ char *get_layer_string(LAYER_TYPE a) return "none"; } - - network make_network(int n, int batch) { network net; @@ -61,7 +59,6 @@ network make_network(int n, int batch) return net; } - void forward_network(network net, float *input, float *truth, int train) { int i; diff --git a/src/network_kernels.cu b/src/network_kernels.cu index a0091740..7909e464 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -176,6 +176,7 @@ float * get_network_delta_gpu_layer(network net, int i) float train_network_datum_gpu(network net, float *x, float *y) { + //clock_t time = clock(); int x_size = get_network_input_size(net)*net.batch; int y_size = get_network_output_size(net)*net.batch; if(!*net.input_gpu){ @@ -185,10 +186,18 @@ float train_network_datum_gpu(network net, float *x, float *y) cuda_push_array(*net.input_gpu, x, x_size); cuda_push_array(*net.truth_gpu, y, y_size); } + //printf("trans %f\n", sec(clock() - time)); + //time = clock(); forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1); + //printf("forw %f\n", sec(clock() - time)); + //time = clock(); backward_network_gpu(net, *net.input_gpu); + //printf("back %f\n", sec(clock() - time)); + //time = clock(); update_network_gpu(net); float error = get_network_cost(net); + //printf("updt %f\n", sec(clock() - time)); + //time = clock(); return error; } diff --git a/src/utils.c b/src/utils.c index a4071e21..96062b08 100644 --- a/src/utils.c +++ b/src/utils.c @@ -233,6 +233,14 @@ float constrain(float a, float max) return a; } +float mse_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]*a[i]; + return sqrt(sum/n); +} + void normalize_array(float *a, int n) { int i; diff --git a/src/utils.h b/src/utils.h index ee26d35b..b1a0587b 100644 --- a/src/utils.h +++ b/src/utils.h @@ -22,6 +22,7 @@ void scale_array(float *a, int n, float s); void translate_array(float *a, int n, float s); int max_index(float *a, int n); float constrain(float a, float max); +float mse_array(float *a, int n); float rand_normal(); float rand_uniform(); float sum_array(float *a, int n);