Connected layers work forward and backward!

This commit is contained in:
Joseph Redmon 2013-11-06 10:37:37 -08:00
parent 41bcfac86f
commit 9b1774bd39
12 changed files with 248 additions and 70 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
*.csv *.csv
images/ images/
opencv/ opencv/
convnet/
cnn cnn
# OS Generated # # OS Generated #

View File

@ -1,10 +1,10 @@
CC=gcc CC=gcc
CFLAGS=-Wall `pkg-config --cflags opencv` -O3 -flto -ffast-math CFLAGS=-Wall `pkg-config --cflags opencv` -O3 -flto -ffast-math
#CFLAGS=-Wall `pkg-config --cflags opencv` -O0 -g CFLAGS=-Wall `pkg-config --cflags opencv` -O0 -g
LDFLAGS=`pkg-config --libs opencv` -lm LDFLAGS=`pkg-config --libs opencv` -lm
VPATH=./src/ VPATH=./src/
OBJ=network.o image.o tests.o convolutional_layer.o connected_layer.o maxpool_layer.o OBJ=network.o image.o tests.o convolutional_layer.o connected_layer.o maxpool_layer.o activations.o
all: cnn all: cnn

32
src/activations.c Normal file
View File

@ -0,0 +1,32 @@
#include "activations.h"
#include <math.h>
double identity_activation(double x)
{
return x;
}
double identity_gradient(double x)
{
return 1;
}
double relu_activation(double x)
{
return x*(x>0);
}
double relu_gradient(double x)
{
return (x>=0);
}
double sigmoid_activation(double x)
{
return 1./(1.+exp(-x));
}
double sigmoid_gradient(double x)
{
return x*(1.-x);
}

10
src/activations.h Normal file
View File

@ -0,0 +1,10 @@
typedef enum{
SIGMOID, RELU, IDENTITY
}ACTIVATOR_TYPE;
double relu_activation(double x);
double relu_gradient(double x);
double sigmoid_activation(double x);
double sigmoid_gradient(double x);
double identity_activation(double x);
double identity_gradient(double x);

View File

@ -1,19 +1,10 @@
#include "connected_layer.h" #include "connected_layer.h"
#include <math.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
double activation(double x) connected_layer make_connected_layer(int inputs, int outputs, ACTIVATOR_TYPE activator)
{
return x*(x>0);
}
double gradient(double x)
{
return (x>=0);
}
connected_layer make_connected_layer(int inputs, int outputs)
{ {
int i; int i;
connected_layer layer; connected_layer layer;
@ -32,6 +23,17 @@ connected_layer make_connected_layer(int inputs, int outputs)
for(i = 0; i < outputs; ++i) for(i = 0; i < outputs; ++i)
layer.biases[i] = (double)rand()/RAND_MAX; layer.biases[i] = (double)rand()/RAND_MAX;
if(activator == SIGMOID){
layer.activation = sigmoid_activation;
layer.gradient = sigmoid_gradient;
}else if(activator == RELU){
layer.activation = relu_activation;
layer.gradient = relu_gradient;
}else if(activator == IDENTITY){
layer.activation = identity_activation;
layer.gradient = identity_gradient;
}
return layer; return layer;
} }
@ -41,39 +43,16 @@ void run_connected_layer(double *input, connected_layer layer)
for(i = 0; i < layer.outputs; ++i){ for(i = 0; i < layer.outputs; ++i){
layer.output[i] = layer.biases[i]; layer.output[i] = layer.biases[i];
for(j = 0; j < layer.inputs; ++j){ for(j = 0; j < layer.inputs; ++j){
layer.output[i] += input[j]*layer.weights[i*layer.outputs + j]; layer.output[i] += input[j]*layer.weights[i*layer.inputs + j];
} }
layer.output[i] = activation(layer.output[i]); layer.output[i] = layer.activation(layer.output[i]);
} }
} }
void backpropagate_connected_layer(double *input, connected_layer layer) void learn_connected_layer(double *input, connected_layer layer)
{ {
int i, j; calculate_update_connected_layer(input, layer);
double *old_input = calloc(layer.inputs, sizeof(double)); backpropagate_connected_layer(input, layer);
memcpy(old_input, input, layer.inputs*sizeof(double));
memset(input, 0, layer.inputs*sizeof(double));
for(i = 0; i < layer.outputs; ++i){
for(j = 0; j < layer.inputs; ++j){
input[j] += layer.output[i]*layer.weights[i*layer.outputs + j];
}
}
for(j = 0; j < layer.inputs; ++j){
input[j] = input[j]*gradient(old_input[j]);
}
free(old_input);
}
void calculate_updates_connected_layer(double *input, connected_layer layer)
{
int i, j;
for(i = 0; i < layer.outputs; ++i){
layer.bias_updates[i] += layer.output[i];
for(j = 0; j < layer.inputs; ++j){
layer.weight_updates[i*layer.outputs + j] += layer.output[i]*input[j];
}
}
} }
void update_connected_layer(connected_layer layer, double step) void update_connected_layer(connected_layer layer, double step)
@ -82,11 +61,36 @@ void update_connected_layer(connected_layer layer, double step)
for(i = 0; i < layer.outputs; ++i){ for(i = 0; i < layer.outputs; ++i){
layer.biases[i] += step*layer.bias_updates[i]; layer.biases[i] += step*layer.bias_updates[i];
for(j = 0; j < layer.inputs; ++j){ for(j = 0; j < layer.inputs; ++j){
int index = i*layer.outputs+j; int index = i*layer.inputs+j;
layer.weights[index] = layer.weight_updates[index]; layer.weights[index] += step*layer.weight_updates[index];
} }
} }
memset(layer.bias_updates, 0, layer.outputs*sizeof(double)); memset(layer.bias_updates, 0, layer.outputs*sizeof(double));
memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(double)); memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(double));
} }
void calculate_update_connected_layer(double *input, connected_layer layer)
{
int i, j;
for(i = 0; i < layer.outputs; ++i){
layer.bias_updates[i] += layer.output[i];
for(j = 0; j < layer.inputs; ++j){
layer.weight_updates[i*layer.inputs + j] += layer.output[i]*input[j];
}
}
}
void backpropagate_connected_layer(double *input, connected_layer layer)
{
int i, j;
for(j = 0; j < layer.inputs; ++j){
double grad = layer.gradient(input[j]);
input[j] = 0;
for(i = 0; i < layer.outputs; ++i){
input[j] += layer.output[i]*layer.weights[i*layer.inputs + j];
}
input[j] *= grad;
}
}

View File

@ -1,6 +1,8 @@
#ifndef CONNECTED_LAYER_H #ifndef CONNECTED_LAYER_H
#define CONNECTED_LAYER_H #define CONNECTED_LAYER_H
#include "activations.h"
typedef struct{ typedef struct{
int inputs; int inputs;
int outputs; int outputs;
@ -9,13 +11,19 @@ typedef struct{
double *weight_updates; double *weight_updates;
double *bias_updates; double *bias_updates;
double *output; double *output;
double (* activation)();
double (* gradient)();
} connected_layer; } connected_layer;
connected_layer make_connected_layer(int inputs, int outputs); connected_layer make_connected_layer(int inputs, int outputs, ACTIVATOR_TYPE activator);
void run_connected_layer(double *input, connected_layer layer); void run_connected_layer(double *input, connected_layer layer);
void backpropagate_connected_layer(double *input, connected_layer layer); void learn_connected_layer(double *input, connected_layer layer);
void calculate_updates_connected_layer(double *input, connected_layer layer);
void update_connected_layer(connected_layer layer, double step); void update_connected_layer(connected_layer layer, double step);
void backpropagate_connected_layer(double *input, connected_layer layer);
void calculate_update_connected_layer(double *input, connected_layer layer);
#endif #endif

View File

@ -33,12 +33,12 @@ void run_convolutional_layer(const image input, const convolutional_layer layer)
for(i = 0; i < layer.n; ++i){ for(i = 0; i < layer.n; ++i){
convolve(input, layer.kernels[i], layer.stride, i, layer.output); convolve(input, layer.kernels[i], layer.stride, i, layer.output);
} }
for(i = 0; i < input.h*input.w*input.c; ++i){ for(i = 0; i < layer.output.h*layer.output.w*layer.output.c; ++i){
input.data[i] = convolution_activation(input.data[i]); layer.output.data[i] = convolution_activation(layer.output.data[i]);
} }
} }
void backpropagate_layer(image input, convolutional_layer layer) void backpropagate_convolutional_layer(image input, convolutional_layer layer)
{ {
int i; int i;
zero_image(input); zero_image(input);
@ -47,7 +47,7 @@ void backpropagate_layer(image input, convolutional_layer layer)
} }
} }
void backpropagate_layer_convolve(image input, convolutional_layer layer) void backpropagate_convolutional_layer_convolve(image input, convolutional_layer layer)
{ {
int i,j; int i,j;
for(i = 0; i < layer.n; ++i){ for(i = 0; i < layer.n; ++i){
@ -67,20 +67,29 @@ void backpropagate_layer_convolve(image input, convolutional_layer layer)
} }
} }
void error_convolutional_layer(image input, convolutional_layer layer) void learn_convolutional_layer(image input, convolutional_layer layer)
{ {
int i; int i;
for(i = 0; i < layer.n; ++i){ for(i = 0; i < layer.n; ++i){
kernel_update(input, layer.kernel_updates[i], layer.stride, i, layer.output); kernel_update(input, layer.kernel_updates[i], layer.stride, i, layer.output);
} }
image old_input = copy_image(input); image old_input = copy_image(input);
zero_image(input); backpropagate_convolutional_layer(input, layer);
for(i = 0; i < layer.n; ++i){
back_convolve(input, layer.kernels[i], layer.stride, i, layer.output);
}
for(i = 0; i < input.h*input.w*input.c; ++i){ for(i = 0; i < input.h*input.w*input.c; ++i){
input.data[i] = input.data[i]*convolution_gradient(input.data[i]); input.data[i] *= convolution_gradient(old_input.data[i]);
} }
free_image(old_input); free_image(old_input);
} }
void update_convolutional_layer(convolutional_layer layer, double step)
{
int i,j;
for(i = 0; i < layer.n; ++i){
int pixels = layer.kernels[i].h*layer.kernels[i].w*layer.kernels[i].c;
for(j = 0; j < pixels; ++j){
layer.kernels[i].data[j] += step*layer.kernel_updates[i].data[j];
}
zero_image(layer.kernel_updates[i]);
}
}

View File

@ -14,8 +14,7 @@ typedef struct {
convolutional_layer make_convolutional_layer(int w, int h, int c, int n, int size, int stride); convolutional_layer make_convolutional_layer(int w, int h, int c, int n, int size, int stride);
void run_convolutional_layer(const image input, const convolutional_layer layer); void run_convolutional_layer(const image input, const convolutional_layer layer);
void backpropagate_layer(image input, convolutional_layer layer); void learn_convolutional_layer(image input, convolutional_layer layer);
void backpropagate_layer_convolve(image input, convolutional_layer layer);
#endif #endif

View File

@ -132,7 +132,7 @@ image make_random_image(int h, int w, int c)
image out = make_image(h,w,c); image out = make_image(h,w,c);
int i; int i;
for(i = 0; i < h*w*c; ++i){ for(i = 0; i < h*w*c; ++i){
out.data[i] = (double)rand()/RAND_MAX; out.data[i] = .5-(double)rand()/RAND_MAX;
} }
return out; return out;
} }

View File

@ -8,7 +8,7 @@
void run_network(image input, network net) void run_network(image input, network net)
{ {
int i; int i;
double *input_d = 0; double *input_d = input.data;
for(i = 0; i < net.n; ++i){ for(i = 0; i < net.n; ++i){
if(net.types[i] == CONVOLUTIONAL){ if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *)net.layers[i]; convolutional_layer layer = *(convolutional_layer *)net.layers[i];
@ -30,6 +30,77 @@ void run_network(image input, network net)
} }
} }
void update_network(network net, double step)
{
int i;
for(i = 0; i < net.n; ++i){
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
update_convolutional_layer(layer, step);
}
else if(net.types[i] == MAXPOOL){
//maxpool_layer layer = *(maxpool_layer *)net.layers[i];
}
else if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *)net.layers[i];
update_connected_layer(layer, step);
}
}
}
void learn_network(image input, network net)
{
int i;
image prev;
double *prev_p;
for(i = net.n-1; i >= 0; --i){
if(i == 0){
prev = input;
prev_p = prev.data;
} else if(net.types[i-1] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *)net.layers[i-1];
prev = layer.output;
prev_p = prev.data;
} else if(net.types[i-1] == MAXPOOL){
maxpool_layer layer = *(maxpool_layer *)net.layers[i-1];
prev = layer.output;
prev_p = prev.data;
} else if(net.types[i-1] == CONNECTED){
connected_layer layer = *(connected_layer *)net.layers[i-1];
prev_p = layer.output;
}
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
learn_convolutional_layer(prev, layer);
}
else if(net.types[i] == MAXPOOL){
//maxpool_layer layer = *(maxpool_layer *)net.layers[i];
}
else if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *)net.layers[i];
learn_connected_layer(prev_p, layer);
}
}
}
double *get_network_output(network net)
{
int i = net.n-1;
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
return layer.output.data;
}
else if(net.types[i] == MAXPOOL){
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
return layer.output.data;
}
else if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *)net.layers[i];
return layer.output;
}
return 0;
}
image get_network_image(network net) image get_network_image(network net)
{ {
int i; int i;

View File

@ -17,6 +17,9 @@ typedef struct {
} network; } network;
void run_network(image input, network net); void run_network(image input, network net);
double *get_network_output(network net);
void learn_network(image input, network net);
void update_network(network net, double step);
image get_network_image(network net); image get_network_image(network net);
#endif #endif

View File

@ -34,11 +34,11 @@ void test_color()
void test_convolutional_layer() void test_convolutional_layer()
{ {
srand(0); srand(0);
image dog = load_image("test_dog.jpg"); image dog = load_image("dog.jpg");
int i; int i;
int n = 5; int n = 3;
int stride = 1; int stride = 1;
int size = 8; int size = 3;
convolutional_layer layer = make_convolutional_layer(dog.h, dog.w, dog.c, n, size, stride); convolutional_layer layer = make_convolutional_layer(dog.h, dog.w, dog.c, n, size, stride);
char buff[256]; char buff[256];
for(i = 0; i < n; ++i) { for(i = 0; i < n; ++i) {
@ -47,7 +47,7 @@ void test_convolutional_layer()
} }
run_convolutional_layer(dog, layer); run_convolutional_layer(dog, layer);
maxpool_layer mlayer = make_maxpool_layer(layer.output.h, layer.output.w, layer.output.c, 3); maxpool_layer mlayer = make_maxpool_layer(layer.output.h, layer.output.w, layer.output.c, 2);
run_maxpool_layer(layer.output,mlayer); run_maxpool_layer(layer.output,mlayer);
show_image_layers(mlayer.output, "Test Maxpool Layer"); show_image_layers(mlayer.output, "Test Maxpool Layer");
@ -128,9 +128,9 @@ void test_network()
n = 128; n = 128;
convolutional_layer cl5 = make_convolutional_layer(cl4.output.h, cl4.output.w, cl4.output.c, n, size, stride); convolutional_layer cl5 = make_convolutional_layer(cl4.output.h, cl4.output.w, cl4.output.c, n, size, stride);
maxpool_layer ml3 = make_maxpool_layer(cl5.output.h, cl5.output.w, cl5.output.c, 4); maxpool_layer ml3 = make_maxpool_layer(cl5.output.h, cl5.output.w, cl5.output.c, 4);
connected_layer nl = make_connected_layer(ml3.output.h*ml3.output.w*ml3.output.c, 4096); connected_layer nl = make_connected_layer(ml3.output.h*ml3.output.w*ml3.output.c, 4096, RELU);
connected_layer nl2 = make_connected_layer(4096, 4096); connected_layer nl2 = make_connected_layer(4096, 4096, RELU);
connected_layer nl3 = make_connected_layer(4096, 1000); connected_layer nl3 = make_connected_layer(4096, 1000, RELU);
net.layers[0] = &cl; net.layers[0] = &cl;
net.layers[1] = &ml; net.layers[1] = &ml;
@ -155,6 +155,7 @@ void test_network()
show_image_layers(get_network_image(net), "Test Network Layer"); show_image_layers(get_network_image(net), "Test Network Layer");
} }
void test_backpropagate() void test_backpropagate()
{ {
int n = 3; int n = 3;
@ -169,13 +170,13 @@ void test_backpropagate()
int i; int i;
clock_t start = clock(), end; clock_t start = clock(), end;
for(i = 0; i < 100; ++i){ for(i = 0; i < 100; ++i){
backpropagate_layer(dog_copy, cl); backpropagate_convolutional_layer(dog_copy, cl);
} }
end = clock(); end = clock();
printf("Backpropagate: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC); printf("Backpropagate: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
start = clock(); start = clock();
for(i = 0; i < 100; ++i){ for(i = 0; i < 100; ++i){
backpropagate_layer_convolve(dog, cl); backpropagate_convolutional_layer_convolve(dog, cl);
} }
end = clock(); end = clock();
printf("Backpropagate Using Convolutions: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC); printf("Backpropagate Using Convolutions: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
@ -185,14 +186,54 @@ void test_backpropagate()
show_image(dog, "Test Backpropagate Difference"); show_image(dog, "Test Backpropagate Difference");
} }
void test_ann()
{
network net;
net.n = 3;
net.layers = calloc(net.n, sizeof(void *));
net.types = calloc(net.n, sizeof(LAYER_TYPE));
net.types[0] = CONNECTED;
net.types[1] = CONNECTED;
net.types[2] = CONNECTED;
connected_layer nl = make_connected_layer(1, 20, RELU);
connected_layer nl2 = make_connected_layer(20, 20, RELU);
connected_layer nl3 = make_connected_layer(20, 1, RELU);
net.layers[0] = &nl;
net.layers[1] = &nl2;
net.layers[2] = &nl3;
image t = make_image(1,1,1);
int count = 0;
double avgerr = 0;
while(1){
double v = ((double)rand()/RAND_MAX);
double truth = v*v;
set_pixel(t,0,0,0,v);
run_network(t, net);
double *out = get_network_output(net);
double err = pow((out[0]-truth),2.);
avgerr = .99 * avgerr + .01 * err;
//if(++count % 100000 == 0) printf("%f\n", avgerr);
if(++count % 100000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
out[0] = truth - out[0];
learn_network(t, net);
update_network(net, .001);
}
}
int main() int main()
{ {
//test_backpropagate(); //test_backpropagate();
test_ann();
//test_convolve(); //test_convolve();
//test_upsample(); //test_upsample();
//test_rotate(); //test_rotate();
//test_load(); //test_load();
test_network(); //test_network();
//test_convolutional_layer(); //test_convolutional_layer();
//test_color(); //test_color();
cvWaitKey(0); cvWaitKey(0);