From 176d65b76583803cf10194c4c70bdc51897f2ae3 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Mon, 11 Aug 2014 12:52:07 -0700 Subject: [PATCH] Nist NIN testing multi-crop --- Makefile | 2 +- src/cnn.c | 66 ++++++++++++++----- src/convolutional_layer.c | 2 +- src/convolutional_layer_gpu.c | 0 src/crop_layer.c | 57 +++++++++++++++++ src/crop_layer.h | 22 +++++++ src/network.c | 60 +++++++++++++++++- src/network.h | 4 +- src/parser.c | 116 +++++++++++++++++++++++++--------- src/utils.c | 9 ++- src/utils.h | 1 + 11 files changed, 288 insertions(+), 51 deletions(-) delete mode 100644 src/convolutional_layer_gpu.c create mode 100644 src/crop_layer.c create mode 100644 src/crop_layer.h diff --git a/Makefile b/Makefile index 877fc7f0..cf0cfdf1 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ VPATH=./src/ EXEC=cnn OBJDIR=./obj/ -OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o +OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o crop_layer.o OBJS = $(addprefix $(OBJDIR), $(OBJ)) all: $(EXEC) diff --git a/src/cnn.c b/src/cnn.c index 41a78084..72ad4a14 100644 --- a/src/cnn.c +++ b/src/cnn.c @@ -240,9 +240,22 @@ void test_full() void test_cifar10() { - srand(222222); + + network net = parse_network_cfg("cfg/cifar10_part5.cfg"); + data test = load_cifar10_data("data/cifar10/test_batch.bin"); + clock_t start = clock(), end; + float test_acc = network_accuracy(net, test); + end = clock(); + printf("%f in %f Sec\n", test_acc, (float)(end-start)/CLOCKS_PER_SEC); + visualize_network(net); + cvWaitKey(0); +} + +void train_cifar10() +{ + srand(555555); network net = parse_network_cfg("cfg/cifar10.cfg"); - //data test = load_cifar10_data("data/cifar10/test_batch.bin"); + data test = load_cifar10_data("data/cifar10/test_batch.bin"); int count = 0; int iters = 10000/net.batch; data train = load_all_cifar10(); @@ -250,12 +263,20 @@ void test_cifar10() clock_t start = clock(), end; float loss = train_network_sgd(net, train, iters); end = clock(); - //visualize_network(net); - //cvWaitKey(1000); + visualize_network(net); + cvWaitKey(5000); //float test_acc = network_accuracy(net, test); //printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay); - printf("%d: Loss: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay); + if(count%10 == 0){ + float test_acc = network_accuracy(net, test); + printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay); + char buff[256]; + sprintf(buff, "/home/pjreddie/cifar/cifar2_%d.cfg", count); + save_network(net, buff); + }else{ + printf("%d: Loss: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay); + } } free_data(train); } @@ -292,13 +313,25 @@ void test_nist_single() void test_nist() { srand(222222); - network net = parse_network_cfg("cfg/nist.cfg"); + network net = parse_network_cfg("cfg/nist_final.cfg"); + data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10); + translate_data_rows(test, -144); + clock_t start = clock(), end; + float test_acc = network_accuracy_multi(net, test,16); + end = clock(); + printf("Accuracy: %f, Time: %lf seconds\n", test_acc,(float)(end-start)/CLOCKS_PER_SEC); +} + +void train_nist() +{ + srand(222222); + network net = parse_network_cfg("cfg/nist_final.cfg"); data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10); data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10); - translate_data_rows(train, -144); - //scale_data_rows(train, 1./128); - translate_data_rows(test, -144); - //scale_data_rows(test, 1./128); + translate_data_rows(train, -144); + //scale_data_rows(train, 1./128); + translate_data_rows(test, -144); + //scale_data_rows(test, 1./128); //randomize_data(train); int count = 0; //clock_t start = clock(), end; @@ -311,12 +344,12 @@ void test_nist() //float test_acc = 0; printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay); /*printf("%f %f %f %f %f\n", mean_array(get_network_output_layer(net,0), 100), - mean_array(get_network_output_layer(net,1), 100), - mean_array(get_network_output_layer(net,2), 100), - mean_array(get_network_output_layer(net,3), 100), - mean_array(get_network_output_layer(net,4), 100)); - */ - //save_network(net, "cfg/nist_basic_trained.cfg"); + mean_array(get_network_output_layer(net,1), 100), + mean_array(get_network_output_layer(net,2), 100), + mean_array(get_network_output_layer(net,3), 100), + mean_array(get_network_output_layer(net,4), 100)); + */ + save_network(net, "cfg/nist_final2.cfg"); //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay); //end = clock(); @@ -778,6 +811,7 @@ int main(int argc, char *argv[]) //test_nist_single(); test_nist(); //test_cifar10(); + //train_cifar10(); //test_vince(); //test_full(); //tune_VOC(); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index afa91d4f..2d4d7489 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -166,7 +166,7 @@ void learn_bias_convolutional_layer(convolutional_layer layer) *convolutional_out_width(layer); for(b = 0; b < layer.batch; ++b){ for(i = 0; i < layer.n; ++i){ - layer.bias_updates[i] += mean_array(layer.delta+size*(i+b*layer.n), size); + layer.bias_updates[i] += sum_array(layer.delta+size*(i+b*layer.n), size); } } } diff --git a/src/convolutional_layer_gpu.c b/src/convolutional_layer_gpu.c deleted file mode 100644 index e69de29b..00000000 diff --git a/src/crop_layer.c b/src/crop_layer.c new file mode 100644 index 00000000..58e1b55c --- /dev/null +++ b/src/crop_layer.c @@ -0,0 +1,57 @@ +#include "crop_layer.h" +#include + +image get_crop_image(crop_layer layer) +{ + int h = layer.crop_height; + int w = layer.crop_width; + int c = layer.c; + return float_to_image(h,w,c,layer.output); +} + +crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip) +{ + fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); + crop_layer *layer = calloc(1, sizeof(crop_layer)); + layer->batch = batch; + layer->h = h; + layer->w = w; + layer->c = c; + layer->flip = flip; + layer->crop_width = crop_width; + layer->crop_height = crop_height; + layer->output = calloc(crop_width*crop_height * c*batch, sizeof(float)); + layer->delta = calloc(crop_width*crop_height * c*batch, sizeof(float)); + return layer; +} +void forward_crop_layer(const crop_layer layer, float *input) +{ + int i,j,c,b; + int dh = rand()%(layer.h - layer.crop_height); + int dw = rand()%(layer.w - layer.crop_width); + int count = 0; + if(layer.flip && rand()%2){ + for(b = 0; b < layer.batch; ++b){ + for(c = 0; c < layer.c; ++c){ + for(i = dh; i < dh+layer.crop_height; ++i){ + for(j = dw+layer.crop_width-1; j >= dw; --j){ + int index = j+layer.w*(i+layer.h*(c + layer.c*b)); + layer.output[count++] = input[index]; + } + } + } + } + }else{ + for(b = 0; b < layer.batch; ++b){ + for(c = 0; c < layer.c; ++c){ + for(i = dh; i < dh+layer.crop_height; ++i){ + for(j = dw; j < dw+layer.crop_width; ++j){ + int index = j+layer.w*(i+layer.h*(c + layer.c*b)); + layer.output[count++] = input[index]; + } + } + } + } + } +} + diff --git a/src/crop_layer.h b/src/crop_layer.h new file mode 100644 index 00000000..a0cd9392 --- /dev/null +++ b/src/crop_layer.h @@ -0,0 +1,22 @@ +#ifndef CROP_LAYER_H +#define CROP_LAYER_H + +#include "image.h" + +typedef struct { + int batch; + int h,w,c; + int crop_width; + int crop_height; + int flip; + float *delta; + float *output; +} crop_layer; + +image get_crop_image(crop_layer layer); +crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip); +void forward_crop_layer(const crop_layer layer, float *input); +void backward_crop_layer(const crop_layer layer, float *input, float *delta); + +#endif + diff --git a/src/network.c b/src/network.c index ed927a8c..292bba0e 100644 --- a/src/network.c +++ b/src/network.c @@ -4,6 +4,7 @@ #include "data.h" #include "utils.h" +#include "crop_layer.h" #include "connected_layer.h" #include "convolutional_layer.h" #include "maxpool_layer.h" @@ -56,6 +57,11 @@ void forward_network(network net, float *input, int train) forward_softmax_layer(layer, input); input = layer.output; } + else if(net.types[i] == CROP){ + crop_layer layer = *(crop_layer *)net.layers[i]; + forward_crop_layer(layer, input); + input = layer.output; + } else if(net.types[i] == MAXPOOL){ maxpool_layer layer = *(maxpool_layer *)net.layers[i]; forward_maxpool_layer(layer, input); @@ -85,6 +91,11 @@ void forward_network(network net, float *input, int train) forward_connected_layer(layer, input); input = layer.output; } + else if(net.types[i] == CROP){ + crop_layer layer = *(crop_layer *)net.layers[i]; + forward_crop_layer(layer, input); + input = layer.output; + } else if(net.types[i] == SOFTMAX){ softmax_layer layer = *(softmax_layer *)net.layers[i]; forward_softmax_layer(layer, input); @@ -266,12 +277,14 @@ float train_network_sgd(network net, data d, int n) int i,j; float sum = 0; + int index = 0; for(i = 0; i < n; ++i){ for(j = 0; j < batch; ++j){ - int index = rand()%d.X.rows; + index = rand()%d.X.rows; memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); } + float err = train_network_datum(net, X, y); sum += err; //train_network_datum(net, X, y); @@ -300,6 +313,7 @@ float train_network_sgd(network net, data d, int n) //} } //printf("Accuracy: %f\n",(float) correct/n); + //show_image(float_to_image(32,32,3,X), "Orig"); free(X); free(y); return (float)sum/(n*batch); @@ -446,6 +460,10 @@ image get_network_image_layer(network net, int i) normalization_layer layer = *(normalization_layer *)net.layers[i]; return get_normalization_image(layer); } + else if(net.types[i] == CROP){ + crop_layer layer = *(crop_layer *)net.layers[i]; + return get_crop_image(layer); + } return make_empty_image(0,0,0); } @@ -464,6 +482,7 @@ void visualize_network(network net) image *prev = 0; int i; char buff[256]; + show_image(get_network_image_layer(net, 0), "Crop"); for(i = 0; i < net.n; ++i){ sprintf(buff, "Layer %d", i); if(net.types[i] == CONVOLUTIONAL){ @@ -484,6 +503,31 @@ float *network_predict(network net, float *input) return out; } +matrix network_predict_data_multi(network net, data test, int n) +{ + int i,j,b,m; + int k = get_network_output_size(net); + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net.batch*test.X.rows, sizeof(float)); + for(i = 0; i < test.X.rows; i += net.batch){ + for(b = 0; b < net.batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + for(m = 0; m < n; ++m){ + float *out = network_predict(net, X); + for(b = 0; b < net.batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] += out[j+b*k]/n; + } + } + } + } + free(X); + return pred; +} + matrix network_predict_data(network net, data test) { int i,j,b; @@ -525,6 +569,12 @@ void print_network(network net) image m = get_maxpool_image(layer); n = m.h*m.w*m.c; } + else if(net.types[i] == CROP){ + crop_layer layer = *(crop_layer *)net.layers[i]; + output = layer.output; + image m = get_crop_image(layer); + n = m.h*m.w*m.c; + } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; output = layer.output; @@ -553,4 +603,12 @@ float network_accuracy(network net, data d) return acc; } +float network_accuracy_multi(network net, data d, int n) +{ + matrix guess = network_predict_data_multi(net, d, n); + float acc = matrix_accuracy(d.y, guess); + free_matrix(guess); + return acc; +} + diff --git a/src/network.h b/src/network.h index a9a6797d..f8666e65 100644 --- a/src/network.h +++ b/src/network.h @@ -12,7 +12,8 @@ typedef enum { MAXPOOL, SOFTMAX, NORMALIZATION, - DROPOUT + DROPOUT, + CROP } LAYER_TYPE; typedef struct { @@ -41,6 +42,7 @@ float train_network_batch(network net, data d, int n); void train_network(network net, data d); matrix network_predict_data(network net, data test); float network_accuracy(network net, data d); +float network_accuracy_multi(network net, data d, int n); float *get_network_output(network net); float *get_network_output_layer(network net, int i); float *get_network_delta_layer(network net, int i); diff --git a/src/parser.c b/src/parser.c index 16563465..5c991a54 100644 --- a/src/parser.c +++ b/src/parser.c @@ -4,6 +4,7 @@ #include "parser.h" #include "activations.h" +#include "crop_layer.h" #include "convolutional_layer.h" #include "connected_layer.h" #include "maxpool_layer.h" @@ -24,6 +25,7 @@ int is_connected(section *s); int is_maxpool(section *s); int is_dropout(section *s); int is_softmax(section *s); +int is_crop(section *s); int is_normalization(section *s); list *read_cfg(char *filename); @@ -43,6 +45,22 @@ void free_section(section *s) free(s); } +void parse_data(char *data, float *a, int n) +{ + int i; + if(!data) return; + char *curr = data; + char *next = data; + int done = 0; + for(i = 0; i < n && !done; ++i){ + while(*++next !='\0' && *next != ','); + if(*next == '\0') done = 1; + *next = '\0'; + sscanf(curr, "%g", &a[i]); + curr = next+1; + } +} + convolutional_layer *parse_convolutional(list *options, network *net, int count) { int i; @@ -95,30 +113,8 @@ convolutional_layer *parse_convolutional(list *options, network *net, int count) } char *weights = option_find_str(options, "weights", 0); char *biases = option_find_str(options, "biases", 0); - if(biases){ - char *curr = biases; - char *next = biases; - int done = 0; - for(i = 0; i < n && !done; ++i){ - while(*++next !='\0' && *next != ','); - if(*next == '\0') done = 1; - *next = '\0'; - sscanf(curr, "%g", &layer->biases[i]); - curr = next+1; - } - } - if(weights){ - char *curr = weights; - char *next = weights; - int done = 0; - for(i = 0; i < c*n*size*size && !done; ++i){ - while(*++next !='\0' && *next != ','); - if(*next == '\0') done = 1; - *next = '\0'; - sscanf(curr, "%g", &layer->filters[i]); - curr = next+1; - } - } + parse_data(biases, layer->biases, n); + parse_data(weights, layer->filters, c*n*size*size); option_unused(options); return layer; } @@ -164,6 +160,10 @@ connected_layer *parse_connected(list *options, network *net, int count) curr = next+1; } } + char *weights = option_find_str(options, "weights", 0); + char *biases = option_find_str(options, "biases", 0); + parse_data(biases, layer->biases, output); + parse_data(weights, layer->weights, input*output); option_unused(options); return layer; } @@ -182,6 +182,36 @@ softmax_layer *parse_softmax(list *options, network *net, int count) return layer; } +crop_layer *parse_crop(list *options, network *net, int count) +{ + float learning_rate, momentum, decay; + int h,w,c; + int crop_height = option_find_int(options, "crop_height",1); + int crop_width = option_find_int(options, "crop_width",1); + int flip = option_find_int(options, "flip",0); + if(count == 0){ + h = option_find_int(options, "height",1); + w = option_find_int(options, "width",1); + c = option_find_int(options, "channels",1); + net->batch = option_find_int(options, "batch",1); + learning_rate = option_find_float(options, "learning_rate", .001); + momentum = option_find_float(options, "momentum", .9); + decay = option_find_float(options, "decay", .0001); + net->learning_rate = learning_rate; + net->momentum = momentum; + net->decay = decay; + }else{ + image m = get_network_image_layer(*net, count-1); + h = m.h; + w = m.w; + c = m.c; + if(h == 0) error("Layer before crop layer must output image."); + } + crop_layer *layer = make_crop_layer(net->batch,h,w,c,crop_height,crop_width,flip); + option_unused(options); + return layer; +} + maxpool_layer *parse_maxpool(list *options, network *net, int count) { int h,w,c; @@ -261,6 +291,10 @@ network parse_network_cfg(char *filename) connected_layer *layer = parse_connected(options, &net, count); net.types[count] = CONNECTED; net.layers[count] = layer; + }else if(is_crop(s)){ + crop_layer *layer = parse_crop(options, &net, count); + net.types[count] = CROP; + net.layers[count] = layer; }else if(is_softmax(s)){ softmax_layer *layer = parse_softmax(options, &net, count); net.types[count] = SOFTMAX; @@ -290,6 +324,10 @@ network parse_network_cfg(char *filename) return net; } +int is_crop(section *s) +{ + return (strcmp(s->type, "[crop]")==0); +} int is_convolutional(section *s) { return (strcmp(s->type, "[conv]")==0 @@ -389,11 +427,11 @@ void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay); } else { if(l->learning_rate != net.learning_rate) - fprintf(fp, "learning_rate=%g\n", l->learning_rate); + fprintf(fp, "learning_rate=%g\n", l->learning_rate); if(l->momentum != net.momentum) - fprintf(fp, "momentum=%g\n", l->momentum); + fprintf(fp, "momentum=%g\n", l->momentum); if(l->decay != net.decay) - fprintf(fp, "decay=%g\n", l->decay); + fprintf(fp, "decay=%g\n", l->decay); } fprintf(fp, "filters=%d\n" "size=%d\n" @@ -432,12 +470,30 @@ void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count) "activation=%s\n", l->outputs, get_activation_string(l->activation)); - fprintf(fp, "data="); + fprintf(fp, "biases="); for(i = 0; i < l->outputs; ++i) fprintf(fp, "%g,", l->biases[i]); - for(i = 0; i < l->inputs*l->outputs; ++i) fprintf(fp, "%g,", l->weights[i]); + fprintf(fp, "\n"); + fprintf(fp, "weights="); + for(i = 0; i < l->outputs*l->inputs; ++i) fprintf(fp, "%g,", l->weights[i]); fprintf(fp, "\n\n"); } +void print_crop_cfg(FILE *fp, crop_layer *l, network net, int count) +{ + fprintf(fp, "[crop]\n"); + if(count == 0) { + fprintf(fp, "batch=%d\n" + "height=%d\n" + "width=%d\n" + "channels=%d\n" + "learning_rate=%g\n" + "momentum=%g\n" + "decay=%g\n", + l->batch,l->h, l->w, l->c, net.learning_rate, net.momentum, net.decay); + } + fprintf(fp, "crop_height=%d\ncrop_width=%d\nflip=%d\n\n", l->crop_height, l->crop_width, l->flip); +} + void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count) { fprintf(fp, "[maxpool]\n"); @@ -481,6 +537,8 @@ void save_network(network net, char *filename) print_convolutional_cfg(fp, (convolutional_layer *)net.layers[i], net, i); else if(net.types[i] == CONNECTED) print_connected_cfg(fp, (connected_layer *)net.layers[i], net, i); + else if(net.types[i] == CROP) + print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i); else if(net.types[i] == MAXPOOL) print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i); else if(net.types[i] == NORMALIZATION) diff --git a/src/utils.c b/src/utils.c index 67a9ba11..8a65ba7b 100644 --- a/src/utils.c +++ b/src/utils.c @@ -143,12 +143,17 @@ float *parse_fields(char *line, int n) return field; } -float mean_array(float *a, int n) +float sum_array(float *a, int n) { int i; float sum = 0; for(i = 0; i < n; ++i) sum += a[i]; - return sum/n; + return sum; +} + +float mean_array(float *a, int n) +{ + return sum_array(a,n)/n; } float variance_array(float *a, int n) diff --git a/src/utils.h b/src/utils.h index 6fe0343a..f38af337 100644 --- a/src/utils.h +++ b/src/utils.h @@ -21,6 +21,7 @@ int max_index(float *a, int n); float constrain(float a, float max); float rand_normal(); float rand_uniform(); +float sum_array(float *a, int n); float mean_array(float *a, int n); float variance_array(float *a, int n); float **one_hot_encode(float *a, int n, int k);