From 7756cccb793bb4950c241f2804195ea859d1b407 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Mon, 13 Oct 2014 22:31:48 -0700 Subject: [PATCH] Refactored connected to use blas --- Makefile | 3 ++- src/cnn.c | 4 ++-- src/connected_layer.c | 23 ++++++++--------------- src/connected_layer.h | 3 --- src/network.c | 12 ++++++++++++ 5 files changed, 24 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index b5ad1eb0..c4abedd0 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC=gcc -GPU=1 +GPU=0 COMMON=-Wall -Wfatal-errors `pkg-config --cflags opencv` -I/usr/local/cuda/include/ ifeq ($(GPU), 1) COMMON+=-DGPU @@ -7,6 +7,7 @@ else endif UNAME = $(shell uname) OPTS=-Ofast -flto +OPTS=-Ofast -flto ifeq ($(UNAME), Darwin) COMMON+= -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include ifeq ($(GPU), 1) diff --git a/src/cnn.c b/src/cnn.c index 472aa03b..df3efa6d 100644 --- a/src/cnn.c +++ b/src/cnn.c @@ -916,8 +916,8 @@ int main(int argc, char *argv[]) //test_ensemble(); //test_nist_single(); //test_nist(); - //train_nist(); - test_convolutional_layer(); + train_nist(); + //test_convolutional_layer(); //test_col2im(); //test_cifar10(); //train_cifar10(); diff --git a/src/connected_layer.c b/src/connected_layer.c index 95db5d59..03590d67 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -26,7 +26,6 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVA layer->weight_updates = calloc(inputs*outputs, sizeof(float)); //layer->weight_adapt = calloc(inputs*outputs, sizeof(float)); - layer->weight_momentum = calloc(inputs*outputs, sizeof(float)); layer->weights = calloc(inputs*outputs, sizeof(float)); float scale = 1./inputs; scale = .05; @@ -35,7 +34,6 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVA layer->bias_updates = calloc(outputs, sizeof(float)); //layer->bias_adapt = calloc(outputs, sizeof(float)); - layer->bias_momentum = calloc(outputs, sizeof(float)); layer->biases = calloc(outputs, sizeof(float)); for(i = 0; i < outputs; ++i){ //layer->biases[i] = rand_normal()*scale + scale; @@ -50,24 +48,19 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVA void update_connected_layer(connected_layer layer) { - int i; - for(i = 0; i < layer.outputs; ++i){ - layer.bias_momentum[i] = layer.learning_rate*(layer.bias_updates[i]) + layer.momentum*layer.bias_momentum[i]; - layer.biases[i] += layer.bias_momentum[i]; - } - for(i = 0; i < layer.outputs*layer.inputs; ++i){ - layer.weight_momentum[i] = layer.learning_rate*(layer.weight_updates[i] - layer.decay*layer.weights[i]) + layer.momentum*layer.weight_momentum[i]; - layer.weights[i] += layer.weight_momentum[i]; - } - memset(layer.bias_updates, 0, layer.outputs*sizeof(float)); - memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(float)); + axpy_cpu(layer.outputs, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1); + scal_cpu(layer.outputs, layer.momentum, layer.bias_updates, 1); + + scal_cpu(layer.inputs*layer.outputs, 1.-layer.learning_rate*layer.decay, layer.weights, 1); + axpy_cpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates, 1, layer.weights, 1); + scal_cpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates, 1); } void forward_connected_layer(connected_layer layer, float *input) { int i; for(i = 0; i < layer.batch; ++i){ - memcpy(layer.output+i*layer.outputs, layer.biases, layer.outputs*sizeof(float)); + copy_cpu(layer.outputs, layer.biases, 1, layer.output + i*layer.outputs, 1); } int m = layer.batch; int k = layer.inputs; @@ -82,8 +75,8 @@ void forward_connected_layer(connected_layer layer, float *input) void backward_connected_layer(connected_layer layer, float *input, float *delta) { int i; + gradient_array(layer.output, layer.outputs*layer.batch, layer.activation, layer.delta); for(i = 0; i < layer.outputs*layer.batch; ++i){ - layer.delta[i] *= gradient(layer.output[i], layer.activation); layer.bias_updates[i%layer.outputs] += layer.delta[i]; } int m = layer.inputs; diff --git a/src/connected_layer.h b/src/connected_layer.h index 43226594..9181fe22 100644 --- a/src/connected_layer.h +++ b/src/connected_layer.h @@ -21,9 +21,6 @@ typedef struct{ float *weight_adapt; float *bias_adapt; - float *weight_momentum; - float *bias_momentum; - float *output; float *delta; diff --git a/src/network.c b/src/network.c index 58331667..e4e4c8e1 100644 --- a/src/network.c +++ b/src/network.c @@ -229,6 +229,8 @@ float *get_network_output_layer(network net, int i) return layer.output; } else if(net.types[i] == DROPOUT){ return get_network_output_layer(net, i-1); + } else if(net.types[i] == FREEWEIGHT){ + return get_network_output_layer(net, i-1); } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; return layer.output; @@ -258,6 +260,8 @@ float *get_network_delta_layer(network net, int i) return layer.delta; } else if(net.types[i] == DROPOUT){ return get_network_delta_layer(net, i-1); + } else if(net.types[i] == FREEWEIGHT){ + return get_network_delta_layer(net, i-1); } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; return layer.delta; @@ -424,6 +428,10 @@ int get_network_input_size_layer(network net, int i) dropout_layer layer = *(dropout_layer *) net.layers[i]; return layer.inputs; } + else if(net.types[i] == FREEWEIGHT){ + freeweight_layer layer = *(freeweight_layer *) net.layers[i]; + return layer.inputs; + } else if(net.types[i] == SOFTMAX){ softmax_layer layer = *(softmax_layer *)net.layers[i]; return layer.inputs; @@ -451,6 +459,10 @@ int get_network_output_size_layer(network net, int i) dropout_layer layer = *(dropout_layer *) net.layers[i]; return layer.inputs; } + else if(net.types[i] == FREEWEIGHT){ + freeweight_layer layer = *(freeweight_layer *) net.layers[i]; + return layer.inputs; + } else if(net.types[i] == SOFTMAX){ softmax_layer layer = *(softmax_layer *)net.layers[i]; return layer.inputs;