From 0d6bb5d44d8e815ebf6ccce1dae2f83178780e7b Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Mon, 2 Dec 2013 16:41:40 -0800 Subject: [PATCH] Working? --- Makefile | 7 +- random_filter_finish.cfg => connected.cfg | 4 +- convolutional.cfg | 9 ++ full.cfg | 21 +++ nist.cfg | 35 ++++ nist_basic.cfg | 10 ++ src/activations.c | 30 +++- src/activations.h | 11 +- src/connected_layer.c | 27 ++-- src/connected_layer.h | 6 +- src/convolutional_layer.c | 165 ++++++++++++++----- src/convolutional_layer.h | 12 +- src/data.c | 31 ++-- src/data.h | 6 +- src/image.c | 148 ++++++++++++++--- src/image.h | 17 +- src/maxpool_layer.c | 30 ++++ src/maxpool_layer.h | 1 + src/network.c | 100 ++++++++++-- src/network.h | 5 +- src/parser.c | 19 +++ src/softmax_layer.c | 35 ++++ src/softmax_layer.h | 14 ++ src/tests.c | 187 +++++++++++++++++++--- src/utils.c | 42 +++++ src/utils.h | 5 + test.cfg | 36 ++--- test_parser.cfg | 8 - test_random_filter.cfg | 29 ---- 29 files changed, 836 insertions(+), 214 deletions(-) rename random_filter_finish.cfg => connected.cfg (69%) create mode 100644 convolutional.cfg create mode 100644 full.cfg create mode 100644 nist.cfg create mode 100644 nist_basic.cfg create mode 100644 src/softmax_layer.c create mode 100644 src/softmax_layer.h delete mode 100644 test_parser.cfg delete mode 100644 test_random_filter.cfg diff --git a/Makefile b/Makefile index 3140af5c..6cd3999a 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,11 @@ CC=gcc -CFLAGS=-Wall `pkg-config --cflags opencv` -O3 -ffast-math -flto -march=native -#CFLAGS=-Wall `pkg-config --cflags opencv` -O0 -g +COMMON=-Wall `pkg-config --cflags opencv` -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include +CFLAGS= $(COMMON) -O3 -ffast-math -flto +#CFLAGS= $(COMMON) -O0 -g LDFLAGS=`pkg-config --libs opencv` -lm VPATH=./src/ -OBJ=network.o image.o tests.o convolutional_layer.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o +OBJ=network.o image.o tests.o convolutional_layer.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o all: cnn diff --git a/random_filter_finish.cfg b/connected.cfg similarity index 69% rename from random_filter_finish.cfg rename to connected.cfg index 68ddda15..dc2c073c 100644 --- a/random_filter_finish.cfg +++ b/connected.cfg @@ -1,6 +1,6 @@ [conn] -input = 1690 -output = 20 +input=1690 +output = 10 activation=relu [conn] diff --git a/convolutional.cfg b/convolutional.cfg new file mode 100644 index 00000000..1612c9cb --- /dev/null +++ b/convolutional.cfg @@ -0,0 +1,9 @@ +[conv] +width=200 +height=200 +channels=3 +filters=10 +size=15 +stride=16 +activation=relu + diff --git a/full.cfg b/full.cfg new file mode 100644 index 00000000..a18da176 --- /dev/null +++ b/full.cfg @@ -0,0 +1,21 @@ +[conv] +width=64 +height=64 +channels=3 +filters=10 +size=11 +stride=2 +activation=ramp + +[maxpool] +stride=2 + +[conn] +output = 100 +activation=ramp + +[conn] +output = 2 +activation=ramp + +[softmax] diff --git a/nist.cfg b/nist.cfg new file mode 100644 index 00000000..cc9282c0 --- /dev/null +++ b/nist.cfg @@ -0,0 +1,35 @@ +[conv] +width=28 +height=28 +channels=1 +filters=4 +size=5 +stride=1 +activation=ramp + +[maxpool] +stride=2 + +[conv] +filters=12 +size=5 +stride=1 +activation=ramp + +[maxpool] +stride=2 + +[conv] +filters=10 +size=3 +stride=1 +activation=ramp + +[maxpool] +stride=2 + +[conn] +output = 10 +activation=ramp + +[softmax] diff --git a/nist_basic.cfg b/nist_basic.cfg new file mode 100644 index 00000000..3b55166b --- /dev/null +++ b/nist_basic.cfg @@ -0,0 +1,10 @@ +[conn] +input=784 +output = 100 +activation=ramp + +[conn] +output = 10 +activation=ramp + +[softmax] diff --git a/src/activations.c b/src/activations.c index a1280296..a255f0fd 100644 --- a/src/activations.c +++ b/src/activations.c @@ -9,10 +9,38 @@ ACTIVATION get_activation(char *s) if (strcmp(s, "sigmoid")==0) return SIGMOID; if (strcmp(s, "relu")==0) return RELU; if (strcmp(s, "identity")==0) return IDENTITY; + if (strcmp(s, "ramp")==0) return RAMP; fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); return RELU; } +double activate(double x, ACTIVATION a){ + switch(a){ + case IDENTITY: + return x; + case SIGMOID: + return 1./(1.+exp(-x)); + case RELU: + return x*(x>0); + case RAMP: + return x*(x>0) + .1*x; + } + return 0; +} +double gradient(double x, ACTIVATION a){ + switch(a){ + case IDENTITY: + return 1; + case SIGMOID: + return (1.-x)*x; + case RELU: + return (x>0); + case RAMP: + return (x>0) + .1; + } + return 0; +} + double identity_activation(double x) { return x; @@ -28,7 +56,7 @@ double relu_activation(double x) } double relu_gradient(double x) { - return (x>=0); + return (x>0); } double sigmoid_activation(double x) diff --git a/src/activations.h b/src/activations.h index 09584cc5..15d96d3d 100644 --- a/src/activations.h +++ b/src/activations.h @@ -2,16 +2,13 @@ #define ACTIVATIONS_H typedef enum{ - SIGMOID, RELU, IDENTITY + SIGMOID, RELU, IDENTITY, RAMP }ACTIVATION; ACTIVATION get_activation(char *s); -double relu_activation(double x); -double relu_gradient(double x); -double sigmoid_activation(double x); -double sigmoid_gradient(double x); -double identity_activation(double x); -double identity_gradient(double x); + +double activate(double x, ACTIVATION a); +double gradient(double x, ACTIVATION a); #endif diff --git a/src/connected_layer.c b/src/connected_layer.c index d77a10ce..99f146b5 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -1,11 +1,12 @@ #include "connected_layer.h" +#include "utils.h" #include #include #include #include -connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activator) +connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activation) { printf("Connected Layer: %d inputs, %d outputs\n", inputs, outputs); int i; @@ -19,26 +20,18 @@ connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activa layer->weight_updates = calloc(inputs*outputs, sizeof(double)); layer->weight_momentum = calloc(inputs*outputs, sizeof(double)); layer->weights = calloc(inputs*outputs, sizeof(double)); + double scale = 2./inputs; for(i = 0; i < inputs*outputs; ++i) - layer->weights[i] = .01*(.5 - (double)rand()/RAND_MAX); + layer->weights[i] = rand_normal()*scale; layer->bias_updates = calloc(outputs, sizeof(double)); layer->bias_momentum = calloc(outputs, sizeof(double)); layer->biases = calloc(outputs, sizeof(double)); for(i = 0; i < outputs; ++i) + //layer->biases[i] = rand_normal()*scale + scale; layer->biases[i] = 1; - if(activator == SIGMOID){ - layer->activation = sigmoid_activation; - layer->gradient = sigmoid_gradient; - }else if(activator == RELU){ - layer->activation = relu_activation; - layer->gradient = relu_gradient; - }else if(activator == IDENTITY){ - layer->activation = identity_activation; - layer->gradient = identity_gradient; - } - + layer->activation = activation; return layer; } @@ -50,7 +43,7 @@ void forward_connected_layer(connected_layer layer, double *input) for(j = 0; j < layer.inputs; ++j){ layer.output[i] += input[j]*layer.weights[i*layer.inputs + j]; } - layer.output[i] = layer.activation(layer.output[i]); + layer.output[i] = activate(layer.output[i], layer.activation); } } @@ -58,6 +51,7 @@ void learn_connected_layer(connected_layer layer, double *input) { int i, j; for(i = 0; i < layer.outputs; ++i){ + layer.delta[i] *= gradient(layer.output[i], layer.activation); layer.bias_updates[i] += layer.delta[i]; for(j = 0; j < layer.inputs; ++j){ layer.weight_updates[i*layer.inputs + j] += layer.delta[i]*input[j]; @@ -69,12 +63,13 @@ void update_connected_layer(connected_layer layer, double step, double momentum, { int i,j; for(i = 0; i < layer.outputs; ++i){ - layer.bias_momentum[i] = step*(layer.bias_updates[i] - decay*layer.biases[i]) + momentum*layer.bias_momentum[i]; + layer.bias_momentum[i] = step*(layer.bias_updates[i]) + momentum*layer.bias_momentum[i]; layer.biases[i] += layer.bias_momentum[i]; for(j = 0; j < layer.inputs; ++j){ int index = i*layer.inputs+j; layer.weight_momentum[index] = step*(layer.weight_updates[index] - decay*layer.weights[index]) + momentum*layer.weight_momentum[index]; layer.weights[index] += layer.weight_momentum[index]; + //layer.weights[index] = constrain(layer.weights[index], 100.); } } memset(layer.bias_updates, 0, layer.outputs*sizeof(double)); @@ -86,12 +81,10 @@ void backward_connected_layer(connected_layer layer, double *input, double *delt int i, j; for(j = 0; j < layer.inputs; ++j){ - double grad = layer.gradient(input[j]); delta[j] = 0; for(i = 0; i < layer.outputs; ++i){ delta[j] += layer.delta[i]*layer.weights[i*layer.inputs + j]; } - delta[j] *= grad; } } diff --git a/src/connected_layer.h b/src/connected_layer.h index 86815cb3..05fb2616 100644 --- a/src/connected_layer.h +++ b/src/connected_layer.h @@ -18,11 +18,11 @@ typedef struct{ double *output; double *delta; - double (* activation)(); - double (* gradient)(); + ACTIVATION activation; + } connected_layer; -connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activator); +connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activation); void forward_connected_layer(connected_layer layer, double *input); void backward_connected_layer(connected_layer layer, double *input, double *delta); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index d4aff73e..6d77700b 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -1,55 +1,74 @@ #include "convolutional_layer.h" +#include "utils.h" #include image get_convolutional_image(convolutional_layer layer) { - int h = (layer.h-1)/layer.stride + 1; - int w = (layer.w-1)/layer.stride + 1; - int c = layer.n; + int h,w,c; + if(layer.edge){ + h = (layer.h-1)/layer.stride + 1; + w = (layer.w-1)/layer.stride + 1; + }else{ + h = (layer.h - layer.size)/layer.stride+1; + w = (layer.h - layer.size)/layer.stride+1; + } + c = layer.n; return double_to_image(h,w,c,layer.output); } image get_convolutional_delta(convolutional_layer layer) { - int h = (layer.h-1)/layer.stride + 1; - int w = (layer.w-1)/layer.stride + 1; - int c = layer.n; + int h,w,c; + if(layer.edge){ + h = (layer.h-1)/layer.stride + 1; + w = (layer.w-1)/layer.stride + 1; + }else{ + h = (layer.h - layer.size)/layer.stride+1; + w = (layer.h - layer.size)/layer.stride+1; + } + c = layer.n; return double_to_image(h,w,c,layer.delta); } -convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activator) +convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activation) { - printf("Convolutional Layer: %d x %d x %d image, %d filters\n", h,w,c,n); int i; + int out_h,out_w; convolutional_layer *layer = calloc(1, sizeof(convolutional_layer)); layer->h = h; layer->w = w; layer->c = c; layer->n = n; + layer->edge = 0; layer->stride = stride; layer->kernels = calloc(n, sizeof(image)); layer->kernel_updates = calloc(n, sizeof(image)); + layer->kernel_momentum = calloc(n, sizeof(image)); layer->biases = calloc(n, sizeof(double)); layer->bias_updates = calloc(n, sizeof(double)); + layer->bias_momentum = calloc(n, sizeof(double)); + double scale = 20./(size*size*c); for(i = 0; i < n; ++i){ - layer->biases[i] = .005; - layer->kernels[i] = make_random_kernel(size, c); - layer->kernel_updates[i] = make_random_kernel(size, c); + //layer->biases[i] = rand_normal()*scale + scale; + layer->biases[i] = 1; + layer->kernels[i] = make_random_kernel(size, c, scale); + layer->kernel_updates[i] = make_random_kernel(size, c, 0); + layer->kernel_momentum[i] = make_random_kernel(size, c, 0); } - layer->output = calloc(((h-1)/stride+1) * ((w-1)/stride+1) * n, sizeof(double)); - layer->delta = calloc(((h-1)/stride+1) * ((w-1)/stride+1) * n, sizeof(double)); + layer->size = 2*(size/2)+1; + if(layer->edge){ + out_h = (layer->h-1)/layer->stride + 1; + out_w = (layer->w-1)/layer->stride + 1; + }else{ + out_h = (layer->h - layer->size)/layer->stride+1; + out_w = (layer->h - layer->size)/layer->stride+1; + } + printf("Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); + layer->output = calloc(out_h * out_w * n, sizeof(double)); + layer->delta = calloc(out_h * out_w * n, sizeof(double)); layer->upsampled = make_image(h,w,n); + layer->activation = activation; - if(activator == SIGMOID){ - layer->activation = sigmoid_activation; - layer->gradient = sigmoid_gradient; - }else if(activator == RELU){ - layer->activation = relu_activation; - layer->gradient = relu_gradient; - }else if(activator == IDENTITY){ - layer->activation = identity_activation; - layer->gradient = identity_gradient; - } return layer; } @@ -59,13 +78,13 @@ void forward_convolutional_layer(const convolutional_layer layer, double *in) image output = get_convolutional_image(layer); int i,j; for(i = 0; i < layer.n; ++i){ - convolve(input, layer.kernels[i], layer.stride, i, output); + convolve(input, layer.kernels[i], layer.stride, i, output, layer.edge); } for(i = 0; i < output.c; ++i){ for(j = 0; j < output.h*output.w; ++j){ int index = i*output.h*output.w + j; output.data[index] += layer.biases[i]; - output.data[index] = layer.activation(output.data[index]); + output.data[index] = activate(output.data[index], layer.activation); } } } @@ -74,32 +93,29 @@ void backward_convolutional_layer(convolutional_layer layer, double *input, doub { int i; - image in_image = double_to_image(layer.h, layer.w, layer.c, input); image in_delta = double_to_image(layer.h, layer.w, layer.c, delta); image out_delta = get_convolutional_delta(layer); zero_image(in_delta); for(i = 0; i < layer.n; ++i){ - back_convolve(in_delta, layer.kernels[i], layer.stride, i, out_delta); - } - for(i = 0; i < layer.h*layer.w*layer.c; ++i){ - in_delta.data[i] *= layer.gradient(in_image.data[i]); + back_convolve(in_delta, layer.kernels[i], layer.stride, i, out_delta, layer.edge); } } -/* -void backpropagate_convolutional_layer_convolve(image input, convolutional_layer layer) +void backward_convolutional_layer2(convolutional_layer layer, double *input, double *delta) { + image in_delta = double_to_image(layer.h, layer.w, layer.c, delta); + image out_delta = get_convolutional_delta(layer); int i,j; for(i = 0; i < layer.n; ++i){ rotate_image(layer.kernels[i]); } - zero_image(input); - upsample_image(layer.output, layer.stride, layer.upsampled); - for(j = 0; j < input.c; ++j){ + zero_image(in_delta); + upsample_image(out_delta, layer.stride, layer.upsampled); + for(j = 0; j < in_delta.c; ++j){ for(i = 0; i < layer.n; ++i){ - two_d_convolve(layer.upsampled, i, layer.kernels[i], j, 1, input, j); + two_d_convolve(layer.upsampled, i, layer.kernels[i], j, 1, in_delta, j, layer.edge); } } @@ -107,34 +123,99 @@ void backpropagate_convolutional_layer_convolve(image input, convolutional_layer rotate_image(layer.kernels[i]); } } -*/ void learn_convolutional_layer(convolutional_layer layer, double *input) { int i; image in_image = double_to_image(layer.h, layer.w, layer.c, input); image out_delta = get_convolutional_delta(layer); + image out_image = get_convolutional_image(layer); + for(i = 0; i < out_image.h*out_image.w*out_image.c; ++i){ + out_delta.data[i] *= gradient(out_image.data[i], layer.activation); + } for(i = 0; i < layer.n; ++i){ - kernel_update(in_image, layer.kernel_updates[i], layer.stride, i, out_delta); + kernel_update(in_image, layer.kernel_updates[i], layer.stride, i, out_delta, layer.edge); layer.bias_updates[i] += avg_image_layer(out_delta, i); + //printf("%30.20lf\n", layer.bias_updates[i]); } } -void update_convolutional_layer(convolutional_layer layer, double step) +void update_convolutional_layer(convolutional_layer layer, double step, double momentum, double decay) { - return; + //step = .01; int i,j; for(i = 0; i < layer.n; ++i){ - layer.biases[i] += step*layer.bias_updates[i]; + layer.bias_momentum[i] = step*(layer.bias_updates[i]) + + momentum*layer.bias_momentum[i]; + layer.biases[i] += layer.bias_momentum[i]; + //layer.biases[i] = constrain(layer.biases[i],1.); layer.bias_updates[i] = 0; int pixels = layer.kernels[i].h*layer.kernels[i].w*layer.kernels[i].c; for(j = 0; j < pixels; ++j){ - layer.kernels[i].data[j] += step*layer.kernel_updates[i].data[j]; + layer.kernel_momentum[i].data[j] = step*(layer.kernel_updates[i].data[j] - decay*layer.kernels[i].data[j]) + + momentum*layer.kernel_momentum[i].data[j]; + layer.kernels[i].data[j] += layer.kernel_momentum[i].data[j]; + //layer.kernels[i].data[j] = constrain(layer.kernels[i].data[j], 1.); } zero_image(layer.kernel_updates[i]); } } +void visualize_convolutional_filters(convolutional_layer layer, char *window) +{ + int color = 1; + int border = 1; + int h,w,c; + int size = layer.size; + h = size; + w = (size + border) * layer.n - border; + c = layer.kernels[0].c; + if(c != 3 || !color){ + h = (h+border)*c - border; + c = 1; + } + + image filters = make_image(h,w,c); + int i,j; + for(i = 0; i < layer.n; ++i){ + int w_offset = i*(size+border); + image k = layer.kernels[i]; + image copy = copy_image(k); + /* + printf("Kernel %d - Bias: %f, Channels:",i,layer.biases[i]); + for(j = 0; j < k.c; ++j){ + double a = avg_image_layer(k, j); + printf("%f, ", a); + } + printf("\n"); + */ + normalize_image(copy); + for(j = 0; j < k.c; ++j){ + set_pixel(copy,0,0,j,layer.biases[i]); + } + if(c == 3 && color){ + embed_image(copy, filters, 0, w_offset); + } + else{ + for(j = 0; j < k.c; ++j){ + int h_offset = j*(size+border); + image layer = get_image_layer(k, j); + embed_image(layer, filters, h_offset, w_offset); + free_image(layer); + } + } + free_image(copy); + } + image delta = get_convolutional_delta(layer); + image dc = collapse_image_layers(delta, 1); + char buff[256]; + sprintf(buff, "%s: Delta", window); + show_image(dc, buff); + free_image(dc); + show_image(filters, window); + free_image(filters); +} + void visualize_convolutional_layer(convolutional_layer layer) { int i; diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index ab414ec0..135d9832 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -7,27 +7,31 @@ typedef struct { int h,w,c; int n; + int size; int stride; image *kernels; image *kernel_updates; + image *kernel_momentum; double *biases; double *bias_updates; + double *bias_momentum; image upsampled; double *delta; double *output; - double (* activation)(); - double (* gradient)(); + ACTIVATION activation; + int edge; } convolutional_layer; -convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activator); +convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activation); void forward_convolutional_layer(const convolutional_layer layer, double *in); void backward_convolutional_layer(convolutional_layer layer, double *input, double *delta); void learn_convolutional_layer(convolutional_layer layer, double *input); -void update_convolutional_layer(convolutional_layer layer, double step); +void update_convolutional_layer(convolutional_layer layer, double step, double momentum, double decay); void backpropagate_convolutional_layer_convolve(image input, convolutional_layer layer); +void visualize_convolutional_filters(convolutional_layer layer, char *window); void visualize_convolutional_layer(convolutional_layer layer); image get_convolutional_image(convolutional_layer layer); diff --git a/src/data.c b/src/data.c index 7ef0d804..9e5791f3 100644 --- a/src/data.c +++ b/src/data.c @@ -30,13 +30,18 @@ list *get_paths(char *filename) return lines; } -int get_truth(char *path) +void fill_truth(char *path, char **labels, int k, double *truth) { - if(strstr(path, "dog")) return 1; - return 0; + int i; + memset(truth, 0, k*sizeof(double)); + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + } + } } -batch load_list(list *paths) +batch load_list(list *paths, char **labels, int k) { char *path; batch data = make_batch(paths->size, 2); @@ -45,16 +50,16 @@ batch load_list(list *paths) for(i = 0; i < data.n; ++i){ path = (char *)n->val; data.images[i] = load_image(path); - data.truth[i][0] = get_truth(path); + fill_truth(path, labels, k, data.truth[i]); n = n->next; } return data; } -batch get_all_data(char *filename) +batch get_all_data(char *filename, char **labels, int k) { list *paths = get_paths(filename); - batch b = load_list(paths); + batch b = load_list(paths, labels, k); free_list_contents(paths); free_list(paths); return b; @@ -71,7 +76,7 @@ void free_batch(batch b) free(b.truth); } -batch get_batch(char *filename, int curr, int total) +batch get_batch(char *filename, int curr, int total, char **labels, int k) { list *plist = get_paths(filename); char **paths = (char **)list_to_array(plist); @@ -81,7 +86,7 @@ batch get_batch(char *filename, int curr, int total) batch b = make_batch(end-start, 2); for(i = start; i < end; ++i){ b.images[i-start] = load_image(paths[i]); - b.truth[i-start][0] = get_truth(paths[i]); + fill_truth(paths[i], labels, k, b.truth[i-start]); } free_list_contents(plist); free_list(plist); @@ -89,7 +94,7 @@ batch get_batch(char *filename, int curr, int total) return b; } -batch random_batch(char *filename, int n) +batch random_batch(char *filename, int n, char **labels, int k) { list *plist = get_paths(filename); char **paths = (char **)list_to_array(plist); @@ -98,8 +103,10 @@ batch random_batch(char *filename, int n) for(i = 0; i < n; ++i){ int index = rand()%plist->size; b.images[i] = load_image(paths[index]); - normalize_image(b.images[i]); - b.truth[i][0] = get_truth(paths[index]); + //scale_image(b.images[i], 1./255.); + z_normalize_image(b.images[i]); + fill_truth(paths[index], labels, k, b.truth[i]); + //print_image(b.images[i]); } free_list_contents(plist); free_list(plist); diff --git a/src/data.h b/src/data.h index fbcb1443..c01384cb 100644 --- a/src/data.h +++ b/src/data.h @@ -9,9 +9,9 @@ typedef struct{ double **truth; } batch; -batch get_all_data(char *filename); -batch random_batch(char *filename, int n); -batch get_batch(char *filename, int curr, int total); +batch get_all_data(char *filename, char **labels, int k); +batch random_batch(char *filename, int n, char **labels, int k); +batch get_batch(char *filename, int curr, int total, char **labels, int k); void free_batch(batch b); diff --git a/src/image.c b/src/image.c index a509d32a..74b88325 100644 --- a/src/image.c +++ b/src/image.c @@ -1,4 +1,5 @@ #include "image.h" +#include "utils.h" #include int windows = 0; @@ -9,6 +10,39 @@ void subtract_image(image a, image b) for(i = 0; i < a.h*a.w*a.c; ++i) a.data[i] -= b.data[i]; } +void embed_image(image source, image dest, int h, int w) +{ + int i,j,k; + for(k = 0; k < source.c; ++k){ + for(i = 0; i < source.h; ++i){ + for(j = 0; j < source.w; ++j){ + double val = get_pixel(source, i,j,k); + set_pixel(dest, h+i, w+j, k, val); + } + } + } +} + +image collapse_image_layers(image source, int border) +{ + int h = source.h; + h = (h+border)*source.c - border; + image dest = make_image(h, source.w, 1); + int i; + for(i = 0; i < source.c; ++i){ + image layer = get_image_layer(source, i); + int h_offset = i*(source.h+border); + embed_image(layer, dest, h_offset, 0); + free_image(layer); + } + return dest; +} + +void z_normalize_image(image p) +{ + normalize_array(p.data, p.h*p.w*p.c); +} + void normalize_image(image p) { double *min = calloc(p.c, sizeof(double)); @@ -24,7 +58,7 @@ void normalize_image(image p) } } for(i = 0; i < p.c; ++i){ - if(max[i] - min[i] < .00001){ + if(max[i] - min[i] < .000000001){ min[i] = 0; max[i] = 1; } @@ -71,12 +105,13 @@ void show_image(image p, char *name) normalize_image(copy); char buff[256]; - sprintf(buff, "%s (%d)", name, windows); + //sprintf(buff, "%s (%d)", name, windows); + sprintf(buff, "%s", name); IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c); int step = disp->widthStep; cvNamedWindow(buff, CV_WINDOW_AUTOSIZE); - cvMoveWindow(buff, 100*(windows%10) + 200*(windows/10), 100*(windows%10)); + //cvMoveWindow(buff, 100*(windows%10) + 200*(windows/10), 100*(windows%10)); ++windows; for(i = 0; i < p.h; ++i){ for(j = 0; j < p.w; ++j){ @@ -85,9 +120,16 @@ void show_image(image p, char *name) } } } - if(disp->height < 100 || disp->width < 100){ + free_image(copy); + if(disp->height < 500 || disp->width < 500){ + int w = 1500; + int h = w*p.h/p.w; + if(h > 1000){ + h = 1000; + w = h*p.w/p.h; + } IplImage *buffer = disp; - disp = cvCreateImage(cvSize(100,100*p.h/p.w), buffer->depth, buffer->nChannels); + disp = cvCreateImage(cvSize(w, h), buffer->depth, buffer->nChannels); cvResize(buffer, disp, CV_INTER_NN); cvReleaseImage(&buffer); } @@ -107,6 +149,13 @@ void show_image_layers(image p, char *name) } } +void show_image_collapsed(image p, char *name) +{ + image c = collapse_image_layers(p, 1); + show_image(c, name); + free_image(c); +} + image make_empty_image(int h, int w, int c) { image out; @@ -157,16 +206,29 @@ image make_random_image(int h, int w, int c) image out = make_image(h,w,c); int i; for(i = 0; i < h*w*c; ++i){ - out.data[i] = (.5-(double)rand()/RAND_MAX); + out.data[i] = rand_normal(); } return out; } -image make_random_kernel(int size, int c) +void add_scalar_image(image m, double s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; +} + +void scale_image(image m, double s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; +} + +image make_random_kernel(int size, int c, double scale) { int pad; if((pad=(size%2==0))) ++size; image out = make_random_image(size,size,c); + scale_image(out, scale); int i,k; if(pad){ for(k = 0; k < out.c; ++k){ @@ -250,18 +312,29 @@ void add_pixel_extend(image m, int x, int y, int c, double val) add_pixel(m, x, y, c, val); } -void two_d_convolve(image m, int mc, image kernel, int kc, int stride, image out, int oc) +void two_d_convolve(image m, int mc, image kernel, int kc, int stride, image out, int oc, int edge) { int x,y,i,j; - for(x = 0; x < m.h; x += stride){ - for(y = 0; y < m.w; y += stride){ + int xstart, xend, ystart, yend; + if(edge){ + xstart = ystart = 0; + xend = m.h; + yend = m.w; + }else{ + xstart = kernel.h/2; + ystart = kernel.w/2; + xend = m.h-kernel.h/2; + yend = m.w - kernel.w/2; + } + for(x = xstart; x < xend; x += stride){ + for(y = ystart; y < yend; y += stride){ double sum = 0; for(i = 0; i < kernel.h; ++i){ for(j = 0; j < kernel.w; ++j){ sum += get_pixel(kernel, i, j, kc)*get_pixel_extend(m, x+i-kernel.h/2, y+j-kernel.w/2, mc); } } - add_pixel(out, x/stride, y/stride, oc, sum); + add_pixel(out, (x-xstart)/stride, (y-ystart)/stride, oc, sum); } } } @@ -280,13 +353,13 @@ double single_convolve(image m, image kernel, int x, int y) return sum; } -void convolve(image m, image kernel, int stride, int channel, image out) +void convolve(image m, image kernel, int stride, int channel, image out, int edge) { assert(m.c == kernel.c); int i; zero_channel(out, channel); for(i = 0; i < m.c; ++i){ - two_d_convolve(m, i, kernel, i, stride, out, channel); + two_d_convolve(m, i, kernel, i, stride, out, channel, edge); } /* int j; @@ -326,20 +399,32 @@ void single_update(image m, image update, int x, int y, double error) } } -void kernel_update(image m, image update, int stride, int channel, image out) +void kernel_update(image m, image update, int stride, int channel, image out, int edge) { assert(m.c == update.c); zero_image(update); - int i, j; - for(i = 0; i < m.h; i += stride){ - for(j = 0; j < m.w; j += stride){ - double error = get_pixel(out, i/stride, j/stride, channel); + int i, j, istart, jstart, iend, jend; + if(edge){ + istart = jstart = 0; + iend = m.h; + jend = m.w; + }else{ + istart = update.h/2; + jstart = update.w/2; + iend = m.h-update.h/2; + jend = m.w - update.w/2; + } + for(i = istart; i < iend; i += stride){ + for(j = jstart; j < jend; j += stride){ + double error = get_pixel(out, (i-istart)/stride, (j-jstart)/stride, channel); single_update(m, update, i, j, error); } } + /* for(i = 0; i < update.h*update.w*update.c; ++i){ update.data[i] /= (m.h/stride)*(m.w/stride); } + */ } void single_back_convolve(image m, image kernel, int x, int y, double val) @@ -355,18 +440,35 @@ void single_back_convolve(image m, image kernel, int x, int y, double val) } } -void back_convolve(image m, image kernel, int stride, int channel, image out) +void back_convolve(image m, image kernel, int stride, int channel, image out, int edge) { assert(m.c == kernel.c); - int i, j; - for(i = 0; i < m.h; i += stride){ - for(j = 0; j < m.w; j += stride){ - double val = get_pixel(out, i/stride, j/stride, channel); + int i, j, istart, jstart, iend, jend; + if(edge){ + istart = jstart = 0; + iend = m.h; + jend = m.w; + }else{ + istart = kernel.h/2; + jstart = kernel.w/2; + iend = m.h-kernel.h/2; + jend = m.w - kernel.w/2; + } + for(i = istart; i < iend; i += stride){ + for(j = jstart; j < jend; j += stride){ + double val = get_pixel(out, (i-istart)/stride, (j-jstart)/stride, channel); single_back_convolve(m, kernel, i, j, val); } } } +void print_image(image m) +{ + int i; + for(i =0 ; i < m.h*m.w*m.c; ++i) printf("%lf, ", m.data[i]); + printf("\n"); +} + void free_image(image m) { free(m.data); diff --git a/src/image.h b/src/image.h index 3117deda..18658575 100644 --- a/src/image.h +++ b/src/image.h @@ -10,20 +10,27 @@ typedef struct { double *data; } image; +void scale_image(image m, double s); +void add_scalar_image(image m, double s); void normalize_image(image p); +void z_normalize_image(image p); void threshold_image(image p, double t); void zero_image(image m); void rotate_image(image m); void subtract_image(image a, image b); double avg_image_layer(image m, int l); +void embed_image(image source, image dest, int h, int w); +image collapse_image_layers(image source, int border); void show_image(image p, char *name); void show_image_layers(image p, char *name); +void show_image_collapsed(image p, char *name); +void print_image(image m); image make_image(int h, int w, int c); image make_empty_image(int h, int w, int c); image make_random_image(int h, int w, int c); -image make_random_kernel(int size, int c); +image make_random_kernel(int size, int c, double scale); image double_to_image(int h, int w, int c, double *data); image copy_image(image p); image load_image(char *filename); @@ -35,11 +42,11 @@ void set_pixel(image m, int x, int y, int c, double val); image get_image_layer(image m, int l); -void two_d_convolve(image m, int mc, image kernel, int kc, int stride, image out, int oc); +void two_d_convolve(image m, int mc, image kernel, int kc, int stride, image out, int oc, int edge); void upsample_image(image m, int stride, image out); -void convolve(image m, image kernel, int stride, int channel, image out); -void back_convolve(image m, image kernel, int stride, int channel, image out); -void kernel_update(image m, image update, int stride, int channel, image out); +void convolve(image m, image kernel, int stride, int channel, image out, int edge); +void back_convolve(image m, image kernel, int stride, int channel, image out, int edge); +void kernel_update(image m, image update, int stride, int channel, image out, int edge); void free_image(image m); #endif diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index f58a22f9..5a82e0b2 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -9,6 +9,14 @@ image get_maxpool_image(maxpool_layer layer) return double_to_image(h,w,c,layer.output); } +image get_maxpool_delta(maxpool_layer layer) +{ + int h = (layer.h-1)/layer.stride + 1; + int w = (layer.w-1)/layer.stride + 1; + int c = layer.c; + return double_to_image(h,w,c,layer.delta); +} + maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride) { printf("Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride); @@ -39,3 +47,25 @@ void forward_maxpool_layer(const maxpool_layer layer, double *in) } } +void backward_maxpool_layer(const maxpool_layer layer, double *in, double *delta) +{ + image input = double_to_image(layer.h, layer.w, layer.c, in); + image input_delta = double_to_image(layer.h, layer.w, layer.c, delta); + image output_delta = get_maxpool_delta(layer); + image output = get_maxpool_image(layer); + int i,j,k; + for(k = 0; k < input.c; ++k){ + for(i = 0; i < input.h; ++i){ + for(j = 0; j < input.w; ++j){ + double val = get_pixel(input, i, j, k); + double cur = get_pixel(output, i/layer.stride, j/layer.stride, k); + double d = get_pixel(output_delta, i/layer.stride, j/layer.stride, k); + if(val == cur) { + set_pixel(input_delta, i, j, k, d); + } + else set_pixel(input_delta, i, j, k, 0); + } + } + } +} + diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index 04fb4b4a..0afe68a7 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -13,6 +13,7 @@ typedef struct { image get_maxpool_image(maxpool_layer layer); maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride); void forward_maxpool_layer(const maxpool_layer layer, double *in); +void backward_maxpool_layer(const maxpool_layer layer, double *in, double *delta); #endif diff --git a/src/network.c b/src/network.c index a77d6074..cce673c2 100644 --- a/src/network.c +++ b/src/network.c @@ -2,10 +2,12 @@ #include "network.h" #include "image.h" #include "data.h" +#include "utils.h" #include "connected_layer.h" #include "convolutional_layer.h" #include "maxpool_layer.h" +#include "softmax_layer.h" network make_network(int n) { @@ -30,6 +32,11 @@ void forward_network(network net, double *input) forward_connected_layer(layer, input); input = layer.output; } + else if(net.types[i] == SOFTMAX){ + softmax_layer layer = *(softmax_layer *)net.layers[i]; + forward_softmax_layer(layer, input); + input = layer.output; + } else if(net.types[i] == MAXPOOL){ maxpool_layer layer = *(maxpool_layer *)net.layers[i]; forward_maxpool_layer(layer, input); @@ -44,14 +51,17 @@ void update_network(network net, double step) for(i = 0; i < net.n; ++i){ if(net.types[i] == CONVOLUTIONAL){ convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - update_convolutional_layer(layer, step); + update_convolutional_layer(layer, step, 0.9, .01); } else if(net.types[i] == MAXPOOL){ //maxpool_layer layer = *(maxpool_layer *)net.layers[i]; } + else if(net.types[i] == SOFTMAX){ + //maxpool_layer layer = *(maxpool_layer *)net.layers[i]; + } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; - update_connected_layer(layer, step, .3, 0); + update_connected_layer(layer, step, .9, 0); } } } @@ -64,6 +74,9 @@ double *get_network_output_layer(network net, int i) } else if(net.types[i] == MAXPOOL){ maxpool_layer layer = *(maxpool_layer *)net.layers[i]; return layer.output; + } else if(net.types[i] == SOFTMAX){ + softmax_layer layer = *(softmax_layer *)net.layers[i]; + return layer.output; } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; return layer.output; @@ -83,6 +96,9 @@ double *get_network_delta_layer(network net, int i) } else if(net.types[i] == MAXPOOL){ maxpool_layer layer = *(maxpool_layer *)net.layers[i]; return layer.delta; + } else if(net.types[i] == SOFTMAX){ + softmax_layer layer = *(softmax_layer *)net.layers[i]; + return layer.delta; } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; return layer.delta; @@ -114,7 +130,12 @@ void learn_network(network net, double *input) if(i != 0) backward_convolutional_layer(layer, prev_input, prev_delta); } else if(net.types[i] == MAXPOOL){ - //maxpool_layer layer = *(maxpool_layer *)net.layers[i]; + maxpool_layer layer = *(maxpool_layer *)net.layers[i]; + if(i != 0) backward_maxpool_layer(layer, prev_input, prev_delta); + } + else if(net.types[i] == SOFTMAX){ + softmax_layer layer = *(softmax_layer *)net.layers[i]; + if(i != 0) backward_softmax_layer(layer, prev_input, prev_delta); } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; @@ -130,19 +151,33 @@ void train_network_batch(network net, batch b) int k = get_network_output_size(net); int correct = 0; for(i = 0; i < b.n; ++i){ + show_image(b.images[i], "Input"); forward_network(net, b.images[i].data); image o = get_network_image(net); + if(o.h) show_image_collapsed(o, "Output"); double *output = get_network_output(net); double *delta = get_network_delta(net); + int max_k = 0; + double max = 0; for(j = 0; j < k; ++j){ - //printf("%f %f\n", b.truth[i][j], output[j]); delta[j] = b.truth[i][j]-output[j]; - if(fabs(delta[j]) < .5) ++correct; - //printf("%f\n", output[j]); + if(output[j] > max) { + max = output[j]; + max_k = j; + } } + if(b.truth[i][max_k]) ++correct; + printf("%f\n", (double)correct/(i+1)); learn_network(net, b.images[i].data); - update_network(net, .00001); + update_network(net, .001); + if(i%100 == 0){ + visualize_network(net); + cvWaitKey(100); + } } + visualize_network(net); + print_network(net); + cvWaitKey(100); printf("Accuracy: %f\n", (double)correct/b.n); } @@ -162,6 +197,10 @@ int get_network_output_size_layer(network net, int i) connected_layer layer = *(connected_layer *)net.layers[i]; return layer.outputs; } + else if(net.types[i] == SOFTMAX){ + softmax_layer layer = *(softmax_layer *)net.layers[i]; + return layer.inputs; + } return 0; } @@ -181,7 +220,7 @@ image get_network_image_layer(network net, int i) maxpool_layer layer = *(maxpool_layer *)net.layers[i]; return get_maxpool_image(layer); } - return make_image(0,0,0); + return make_empty_image(0,0,0); } image get_network_image(network net) @@ -191,17 +230,56 @@ image get_network_image(network net) image m = get_network_image_layer(net, i); if(m.h != 0) return m; } - return make_image(1,1,1); + return make_empty_image(0,0,0); } void visualize_network(network net) { int i; - for(i = 0; i < 1; ++i){ + char buff[256]; + for(i = 0; i < net.n; ++i){ + sprintf(buff, "Layer %d", i); if(net.types[i] == CONVOLUTIONAL){ convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - visualize_convolutional_layer(layer); + visualize_convolutional_filters(layer, buff); } } } +void print_network(network net) +{ + int i,j; + for(i = 0; i < net.n; ++i){ + double *output; + int n = 0; + if(net.types[i] == CONVOLUTIONAL){ + convolutional_layer layer = *(convolutional_layer *)net.layers[i]; + output = layer.output; + image m = get_convolutional_image(layer); + n = m.h*m.w*m.c; + } + else if(net.types[i] == MAXPOOL){ + maxpool_layer layer = *(maxpool_layer *)net.layers[i]; + output = layer.output; + image m = get_maxpool_image(layer); + n = m.h*m.w*m.c; + } + else if(net.types[i] == CONNECTED){ + connected_layer layer = *(connected_layer *)net.layers[i]; + output = layer.output; + n = layer.outputs; + } + else if(net.types[i] == SOFTMAX){ + softmax_layer layer = *(softmax_layer *)net.layers[i]; + output = layer.output; + n = layer.inputs; + } + double mean = mean_array(output, n); + double vari = variance_array(output, n); + printf("Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); + if(n > 100) n = 100; + for(j = 0; j < n; ++j) printf("%f, ", output[j]); + if(n == 100)printf(".....\n"); + printf("\n"); + } +} diff --git a/src/network.h b/src/network.h index 10fa6c55..c655c91c 100644 --- a/src/network.h +++ b/src/network.h @@ -8,7 +8,8 @@ typedef enum { CONVOLUTIONAL, CONNECTED, - MAXPOOL + MAXPOOL, + SOFTMAX } LAYER_TYPE; typedef struct { @@ -30,6 +31,8 @@ int get_network_output_size_layer(network net, int i); int get_network_output_size(network net); image get_network_image(network net); image get_network_image_layer(network net, int i); +void print_network(network net); +void visualize_network(network net); #endif diff --git a/src/parser.c b/src/parser.c index 75416206..dc1db2ba 100644 --- a/src/parser.c +++ b/src/parser.c @@ -7,6 +7,7 @@ #include "convolutional_layer.h" #include "connected_layer.h" #include "maxpool_layer.h" +#include "softmax_layer.h" #include "list.h" #include "option_list.h" #include "utils.h" @@ -19,6 +20,7 @@ typedef struct{ int is_convolutional(section *s); int is_connected(section *s); int is_maxpool(section *s); +int is_softmax(section *s); list *read_cfg(char *filename); @@ -69,6 +71,17 @@ network parse_network_cfg(char *filename) net.types[count] = CONNECTED; net.layers[count] = layer; option_unused(options); + }else if(is_softmax(s)){ + int input; + if(count == 0){ + input = option_find_int(options, "input",1); + }else{ + input = get_network_output_size_layer(net, count-1); + } + softmax_layer *layer = make_softmax_layer(input); + net.types[count] = SOFTMAX; + net.layers[count] = layer; + option_unused(options); }else if(is_maxpool(s)){ int h,w,c; int stride = option_find_int(options, "stride",1); @@ -113,6 +126,12 @@ int is_maxpool(section *s) || strcmp(s->type, "[maxpool]")==0); } +int is_softmax(section *s) +{ + return (strcmp(s->type, "[soft]")==0 + || strcmp(s->type, "[softmax]")==0); +} + int read_option(char *s, list *options) { int i; diff --git a/src/softmax_layer.c b/src/softmax_layer.c new file mode 100644 index 00000000..28696b70 --- /dev/null +++ b/src/softmax_layer.c @@ -0,0 +1,35 @@ +#include "softmax_layer.h" +#include +#include +#include + +softmax_layer *make_softmax_layer(int inputs) +{ + printf("Softmax Layer: %d inputs\n", inputs); + softmax_layer *layer = calloc(1, sizeof(softmax_layer)); + layer->inputs = inputs; + layer->output = calloc(inputs, sizeof(double)); + layer->delta = calloc(inputs, sizeof(double)); + return layer; +} + +void forward_softmax_layer(const softmax_layer layer, double *input) +{ + int i; + double sum = 0; + for(i = 0; i < layer.inputs; ++i){ + sum += exp(input[i]); + } + for(i = 0; i < layer.inputs; ++i){ + layer.output[i] = exp(input[i])/sum; + } +} + +void backward_softmax_layer(const softmax_layer layer, double *input, double *delta) +{ + int i; + for(i = 0; i < layer.inputs; ++i){ + delta[i] = layer.delta[i]; + } +} + diff --git a/src/softmax_layer.h b/src/softmax_layer.h new file mode 100644 index 00000000..1a0d7605 --- /dev/null +++ b/src/softmax_layer.h @@ -0,0 +1,14 @@ +#ifndef SOFTMAX_LAYER_H +#define SOFTMAX_LAYER_H + +typedef struct { + int inputs; + double *delta; + double *output; +} softmax_layer; + +softmax_layer *make_softmax_layer(int inputs); +void forward_softmax_layer(const softmax_layer layer, double *input); +void backward_softmax_layer(const softmax_layer layer, double *input, double *delta); + +#endif diff --git a/src/tests.c b/src/tests.c index 65811e9a..722de1ae 100644 --- a/src/tests.c +++ b/src/tests.c @@ -6,6 +6,7 @@ #include "parser.h" #include "data.h" #include "matrix.h" +#include "utils.h" #include #include @@ -21,7 +22,7 @@ void test_convolve() int i; clock_t start = clock(), end; for(i = 0; i < 1000; ++i){ - convolve(dog, kernel, 1, 0, edge); + convolve(dog, kernel, 1, 0, edge, 1); } end = clock(); printf("Convolutions: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC); @@ -57,6 +58,61 @@ void test_convolutional_layer() show_image_layers(get_maxpool_image(mlayer), "Test Maxpool Layer"); } +void verify_convolutional_layer() +{ + srand(0); + int i; + int n = 1; + int stride = 1; + int size = 3; + double eps = .00000001; + image test = make_random_image(5,5, 1); + convolutional_layer layer = *make_convolutional_layer(test.h,test.w,test.c, n, size, stride, RELU); + image out = get_convolutional_image(layer); + double **jacobian = calloc(test.h*test.w*test.c, sizeof(double)); + + forward_convolutional_layer(layer, test.data); + image base = copy_image(out); + + for(i = 0; i < test.h*test.w*test.c; ++i){ + test.data[i] += eps; + forward_convolutional_layer(layer, test.data); + image partial = copy_image(out); + subtract_image(partial, base); + scale_image(partial, 1/eps); + jacobian[i] = partial.data; + test.data[i] -= eps; + } + double **jacobian2 = calloc(out.h*out.w*out.c, sizeof(double)); + image in_delta = make_image(test.h, test.w, test.c); + image out_delta = get_convolutional_delta(layer); + for(i = 0; i < out.h*out.w*out.c; ++i){ + out_delta.data[i] = 1; + backward_convolutional_layer2(layer, test.data, in_delta.data); + image partial = copy_image(in_delta); + jacobian2[i] = partial.data; + out_delta.data[i] = 0; + } + int j; + double *j1 = calloc(test.h*test.w*test.c*out.h*out.w*out.c, sizeof(double)); + double *j2 = calloc(test.h*test.w*test.c*out.h*out.w*out.c, sizeof(double)); + for(i = 0; i < test.h*test.w*test.c; ++i){ + for(j =0 ; j < out.h*out.w*out.c; ++j){ + j1[i*out.h*out.w*out.c + j] = jacobian[i][j]; + j2[i*out.h*out.w*out.c + j] = jacobian2[j][i]; + printf("%f %f\n", jacobian[i][j], jacobian2[j][i]); + } + } + + + image mj1 = double_to_image(test.w*test.h*test.c, out.w*out.h*out.c, 1, j1); + image mj2 = double_to_image(test.w*test.h*test.c, out.w*out.h*out.c, 1, j2); + printf("%f %f\n", avg_image_layer(mj1,0), avg_image_layer(mj2,0)); + show_image(mj1, "forward jacobian"); + show_image(mj2, "backward jacobian"); + +} + void test_load() { image dog = load_image("dog.jpg"); @@ -119,30 +175,26 @@ void test_parser() void test_data() { - batch train = random_batch("train_paths.txt", 101); + char *labels[] = {"cat","dog"}; + batch train = random_batch("train_paths.txt", 101,labels, 2); show_image(train.images[0], "Test Data Loading"); show_image(train.images[100], "Test Data Loading"); show_image(train.images[10], "Test Data Loading"); free_batch(train); } -void test_train() +void test_full() { - network net = parse_network_cfg("test.cfg"); + network net = parse_network_cfg("full.cfg"); srand(0); - //visualize_network(net); - int i = 1000; - //while(1){ - while(i > 0){ - batch train = random_batch("train_paths.txt", 100); + int i = 0; + char *labels[] = {"cat","dog"}; + while(i++ < 1000 || 1){ + batch train = random_batch("train_paths.txt", 1000, labels, 2); train_network_batch(net, train); - //show_image_layers(get_network_image(net), "hey"); - //visualize_network(net); - //cvWaitKey(0); free_batch(train); - --i; - } - //} + printf("Round %d\n", i); + } } double error_network(network net, matrix m, double *truth) @@ -158,9 +210,90 @@ double error_network(network net, matrix m, double *truth) return (double)correct/m.rows; } -void classify_random_filters() +double **one_hot(double *a, int n, int k) { - network net = parse_network_cfg("random_filter_finish.cfg"); + int i; + double **t = calloc(n, sizeof(double*)); + for(i = 0; i < n; ++i){ + t[i] = calloc(k, sizeof(double)); + int index = (int)a[i]; + t[i][index] = 1; + } + return t; +} + +void test_nist() +{ + network net = parse_network_cfg("nist.cfg"); + matrix m = csv_to_matrix("images/nist_train.csv"); + matrix ho = hold_out_matrix(&m, 3000); + double *truth_1d = pop_column(&m, 0); + double **truth = one_hot(truth_1d, m.rows, 10); + double *ho_truth_1d = pop_column(&ho, 0); + double **ho_truth = one_hot(ho_truth_1d, ho.rows, 10); + int i,j; + clock_t start = clock(), end; + int count = 0; + double lr = .0001; + while(++count <= 3000000){ + //lr *= .99; + int index = 0; + int correct = 0; + for(i = 0; i < 1000; ++i){ + index = rand()%m.rows; + normalize_array(m.vals[index], 28*28); + forward_network(net, m.vals[index]); + double *out = get_network_output(net); + double *delta = get_network_delta(net); + int max_i = 0; + double max = out[0]; + for(j = 0; j < 10; ++j){ + delta[j] = truth[index][j]-out[j]; + if(out[j] > max){ + max = out[j]; + max_i = j; + } + } + if(truth[index][max_i]) ++correct; + learn_network(net, m.vals[index]); + update_network(net, lr); + } + print_network(net); + image input = double_to_image(28,28,1, m.vals[index]); + show_image(input, "Input"); + image o = get_network_image(net); + show_image_collapsed(o, "Output"); + visualize_network(net); + cvWaitKey(100); + //double test_acc = error_network(net, m, truth); + //double valid_acc = error_network(net, ho, ho_truth); + //printf("%f, %f\n", test_acc, valid_acc); + fprintf(stderr, "%5d: %f %f\n",count, (double)correct/1000, lr); + //if(valid_acc > .70) break; + } + end = clock(); + printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC); +} + +void test_kernel_update() +{ + srand(0); + double delta[] = {.1}; + double input[] = {.3, .5, .3, .5, .5, .5, .5, .0, .5}; + double kernel[] = {1,2,3,4,5,6,7,8,9}; + convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, IDENTITY); + layer.kernels[0].data = kernel; + layer.delta = delta; + learn_convolutional_layer(layer, input); + print_image(layer.kernels[0]); + print_image(get_convolutional_delta(layer)); + print_image(layer.kernel_updates[0]); + +} + +void test_random_classify() +{ + network net = parse_network_cfg("connected.cfg"); matrix m = csv_to_matrix("train.csv"); matrix ho = hold_out_matrix(&m, 2500); double *truth = pop_column(&m, 0); @@ -181,7 +314,7 @@ void classify_random_filters() // printf("%f\n", delta[0]); //printf("%f %f\n", truth[index], out[0]); learn_network(net, m.vals[index]); - update_network(net, .000005); + update_network(net, .00001); } double test_acc = error_network(net, m, truth); double valid_acc = error_network(net, ho, ho_truth); @@ -203,15 +336,16 @@ void classify_random_filters() printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC); } -void test_random_filters() +void test_random_preprocess() { - FILE *file = fopen("test.csv", "w"); + FILE *file = fopen("train.csv", "w"); + char *labels[] = {"cat","dog"}; int i,j,k; srand(0); - network net = parse_network_cfg("test_random_filter.cfg"); + network net = parse_network_cfg("convolutional.cfg"); for(i = 0; i < 100; ++i){ printf("%d\n", i); - batch part = get_batch("test_paths.txt", i, 100); + batch part = get_batch("train_paths.txt", i, 100, labels, 2); for(j = 0; j < part.n; ++j){ forward_network(net, part.images[j].data); double *out = get_network_output(net); @@ -227,9 +361,11 @@ void test_random_filters() int main() { - //classify_random_filters(); - //test_random_filters(); - test_train(); + //test_kernel_update(); + //test_nist(); + test_full(); + //test_random_preprocess(); + //test_random_classify(); //test_parser(); //test_backpropagate(); //test_ann(); @@ -239,6 +375,7 @@ int main() //test_load(); //test_network(); //test_convolutional_layer(); + //verify_convolutional_layer(); //test_color(); cvWaitKey(0); return 0; diff --git a/src/utils.c b/src/utils.c index 9848d080..8229b2d0 100644 --- a/src/utils.c +++ b/src/utils.c @@ -143,5 +143,47 @@ double *parse_fields(char *line, int n) return field; } +double mean_array(double *a, int n) +{ + int i; + double sum = 0; + for(i = 0; i < n; ++i) sum += a[i]; + return sum/n; +} +double variance_array(double *a, int n) +{ + int i; + double sum = 0; + double mean = mean_array(a, n); + for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); + double variance = sum/n; + return variance; +} +double constrain(double a, double max) +{ + if(a > abs(max)) return abs(max); + if(a < -abs(max)) return -abs(max); + return a; +} + +void normalize_array(double *a, int n) +{ + int i; + double mu = mean_array(a,n); + double sigma = sqrt(variance_array(a,n)); + for(i = 0; i < n; ++i){ + a[i] = (a[i] - mu)/sigma; + } + mu = mean_array(a,n); + sigma = sqrt(variance_array(a,n)); +} + +double rand_normal() +{ + int i; + double sum= 0; + for(i = 0; i < 12; ++i) sum += (double)rand()/RAND_MAX; + return sum-6.; +} diff --git a/src/utils.h b/src/utils.h index 87ef428d..35217782 100644 --- a/src/utils.h +++ b/src/utils.h @@ -14,5 +14,10 @@ list *parse_csv_line(char *line); char *copy_string(char *s); int count_fields(char *line); double *parse_fields(char *line, int n); +void normalize_array(double *a, int n); +double constrain(double a, double max); +double rand_normal(); +double mean_array(double *a, int n); +double variance_array(double *a, int n); #endif diff --git a/test.cfg b/test.cfg index 84b6c82a..fdbcc107 100644 --- a/test.cfg +++ b/test.cfg @@ -3,30 +3,30 @@ width=200 height=200 channels=3 filters=10 -size=3 -stride=2 +size=15 +stride=16 activation=relu -[maxpool] -stride=2 +#[maxpool] +#stride=2 -[conv] -filters=10 -size=10 -stride=2 -activation=relu +#[conv] +#filters=10 +#size=10 +#stride=4 +#activation=relu -[maxpool] -stride=2 +#[maxpool] +#stride=2 -[conv] -filters=10 -size=10 -stride=2 -activation=relu +#[conv] +#filters=10 +#size=10 +#stride=4 +#activation=relu -[maxpool] -stride=2 +#[maxpool] +#stride=2 [conn] output = 10 diff --git a/test_parser.cfg b/test_parser.cfg deleted file mode 100644 index 788d71a3..00000000 --- a/test_parser.cfg +++ /dev/null @@ -1,8 +0,0 @@ -[conn] -input=1 -output = 20 -activation=sigmoid - -[conn] -output = 1 -activation=sigmoid diff --git a/test_random_filter.cfg b/test_random_filter.cfg deleted file mode 100644 index bfd7f0c1..00000000 --- a/test_random_filter.cfg +++ /dev/null @@ -1,29 +0,0 @@ -[conv] -width=200 -height=200 -channels=3 -filters=10 -size=15 -stride=2 -activation=relu - -[maxpool] -stride=2 - -[conv] -filters=10 -size=5 -stride=1 -activation=relu - -[maxpool] -stride=2 - -[conv] -filters=10 -size=3 -stride=1 -activation=relu - -[maxpool] -stride=2