mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Connected layers work forward and backward!
This commit is contained in:
parent
41bcfac86f
commit
9b1774bd39
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,6 +3,7 @@
|
||||
*.csv
|
||||
images/
|
||||
opencv/
|
||||
convnet/
|
||||
cnn
|
||||
|
||||
# OS Generated #
|
||||
|
4
Makefile
4
Makefile
@ -1,10 +1,10 @@
|
||||
CC=gcc
|
||||
CFLAGS=-Wall `pkg-config --cflags opencv` -O3 -flto -ffast-math
|
||||
#CFLAGS=-Wall `pkg-config --cflags opencv` -O0 -g
|
||||
CFLAGS=-Wall `pkg-config --cflags opencv` -O0 -g
|
||||
LDFLAGS=`pkg-config --libs opencv` -lm
|
||||
VPATH=./src/
|
||||
|
||||
OBJ=network.o image.o tests.o convolutional_layer.o connected_layer.o maxpool_layer.o
|
||||
OBJ=network.o image.o tests.o convolutional_layer.o connected_layer.o maxpool_layer.o activations.o
|
||||
|
||||
all: cnn
|
||||
|
||||
|
32
src/activations.c
Normal file
32
src/activations.c
Normal file
@ -0,0 +1,32 @@
|
||||
#include "activations.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
double identity_activation(double x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
double identity_gradient(double x)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
double relu_activation(double x)
|
||||
{
|
||||
return x*(x>0);
|
||||
}
|
||||
double relu_gradient(double x)
|
||||
{
|
||||
return (x>=0);
|
||||
}
|
||||
|
||||
double sigmoid_activation(double x)
|
||||
{
|
||||
return 1./(1.+exp(-x));
|
||||
}
|
||||
|
||||
double sigmoid_gradient(double x)
|
||||
{
|
||||
return x*(1.-x);
|
||||
}
|
||||
|
10
src/activations.h
Normal file
10
src/activations.h
Normal file
@ -0,0 +1,10 @@
|
||||
typedef enum{
|
||||
SIGMOID, RELU, IDENTITY
|
||||
}ACTIVATOR_TYPE;
|
||||
|
||||
double relu_activation(double x);
|
||||
double relu_gradient(double x);
|
||||
double sigmoid_activation(double x);
|
||||
double sigmoid_gradient(double x);
|
||||
double identity_activation(double x);
|
||||
double identity_gradient(double x);
|
@ -1,19 +1,10 @@
|
||||
#include "connected_layer.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
double activation(double x)
|
||||
{
|
||||
return x*(x>0);
|
||||
}
|
||||
|
||||
double gradient(double x)
|
||||
{
|
||||
return (x>=0);
|
||||
}
|
||||
|
||||
connected_layer make_connected_layer(int inputs, int outputs)
|
||||
connected_layer make_connected_layer(int inputs, int outputs, ACTIVATOR_TYPE activator)
|
||||
{
|
||||
int i;
|
||||
connected_layer layer;
|
||||
@ -32,6 +23,17 @@ connected_layer make_connected_layer(int inputs, int outputs)
|
||||
for(i = 0; i < outputs; ++i)
|
||||
layer.biases[i] = (double)rand()/RAND_MAX;
|
||||
|
||||
if(activator == SIGMOID){
|
||||
layer.activation = sigmoid_activation;
|
||||
layer.gradient = sigmoid_gradient;
|
||||
}else if(activator == RELU){
|
||||
layer.activation = relu_activation;
|
||||
layer.gradient = relu_gradient;
|
||||
}else if(activator == IDENTITY){
|
||||
layer.activation = identity_activation;
|
||||
layer.gradient = identity_gradient;
|
||||
}
|
||||
|
||||
return layer;
|
||||
}
|
||||
|
||||
@ -41,39 +43,16 @@ void run_connected_layer(double *input, connected_layer layer)
|
||||
for(i = 0; i < layer.outputs; ++i){
|
||||
layer.output[i] = layer.biases[i];
|
||||
for(j = 0; j < layer.inputs; ++j){
|
||||
layer.output[i] += input[j]*layer.weights[i*layer.outputs + j];
|
||||
layer.output[i] += input[j]*layer.weights[i*layer.inputs + j];
|
||||
}
|
||||
layer.output[i] = activation(layer.output[i]);
|
||||
layer.output[i] = layer.activation(layer.output[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void backpropagate_connected_layer(double *input, connected_layer layer)
|
||||
void learn_connected_layer(double *input, connected_layer layer)
|
||||
{
|
||||
int i, j;
|
||||
double *old_input = calloc(layer.inputs, sizeof(double));
|
||||
memcpy(old_input, input, layer.inputs*sizeof(double));
|
||||
memset(input, 0, layer.inputs*sizeof(double));
|
||||
|
||||
for(i = 0; i < layer.outputs; ++i){
|
||||
for(j = 0; j < layer.inputs; ++j){
|
||||
input[j] += layer.output[i]*layer.weights[i*layer.outputs + j];
|
||||
}
|
||||
}
|
||||
for(j = 0; j < layer.inputs; ++j){
|
||||
input[j] = input[j]*gradient(old_input[j]);
|
||||
}
|
||||
free(old_input);
|
||||
}
|
||||
|
||||
void calculate_updates_connected_layer(double *input, connected_layer layer)
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < layer.outputs; ++i){
|
||||
layer.bias_updates[i] += layer.output[i];
|
||||
for(j = 0; j < layer.inputs; ++j){
|
||||
layer.weight_updates[i*layer.outputs + j] += layer.output[i]*input[j];
|
||||
}
|
||||
}
|
||||
calculate_update_connected_layer(input, layer);
|
||||
backpropagate_connected_layer(input, layer);
|
||||
}
|
||||
|
||||
void update_connected_layer(connected_layer layer, double step)
|
||||
@ -82,11 +61,36 @@ void update_connected_layer(connected_layer layer, double step)
|
||||
for(i = 0; i < layer.outputs; ++i){
|
||||
layer.biases[i] += step*layer.bias_updates[i];
|
||||
for(j = 0; j < layer.inputs; ++j){
|
||||
int index = i*layer.outputs+j;
|
||||
layer.weights[index] = layer.weight_updates[index];
|
||||
int index = i*layer.inputs+j;
|
||||
layer.weights[index] += step*layer.weight_updates[index];
|
||||
}
|
||||
}
|
||||
memset(layer.bias_updates, 0, layer.outputs*sizeof(double));
|
||||
memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(double));
|
||||
}
|
||||
|
||||
void calculate_update_connected_layer(double *input, connected_layer layer)
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < layer.outputs; ++i){
|
||||
layer.bias_updates[i] += layer.output[i];
|
||||
for(j = 0; j < layer.inputs; ++j){
|
||||
layer.weight_updates[i*layer.inputs + j] += layer.output[i]*input[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void backpropagate_connected_layer(double *input, connected_layer layer)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for(j = 0; j < layer.inputs; ++j){
|
||||
double grad = layer.gradient(input[j]);
|
||||
input[j] = 0;
|
||||
for(i = 0; i < layer.outputs; ++i){
|
||||
input[j] += layer.output[i]*layer.weights[i*layer.inputs + j];
|
||||
}
|
||||
input[j] *= grad;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef CONNECTED_LAYER_H
|
||||
#define CONNECTED_LAYER_H
|
||||
|
||||
#include "activations.h"
|
||||
|
||||
typedef struct{
|
||||
int inputs;
|
||||
int outputs;
|
||||
@ -9,13 +11,19 @@ typedef struct{
|
||||
double *weight_updates;
|
||||
double *bias_updates;
|
||||
double *output;
|
||||
|
||||
double (* activation)();
|
||||
double (* gradient)();
|
||||
} connected_layer;
|
||||
|
||||
connected_layer make_connected_layer(int inputs, int outputs);
|
||||
connected_layer make_connected_layer(int inputs, int outputs, ACTIVATOR_TYPE activator);
|
||||
|
||||
void run_connected_layer(double *input, connected_layer layer);
|
||||
void backpropagate_connected_layer(double *input, connected_layer layer);
|
||||
void calculate_updates_connected_layer(double *input, connected_layer layer);
|
||||
void learn_connected_layer(double *input, connected_layer layer);
|
||||
void update_connected_layer(connected_layer layer, double step);
|
||||
|
||||
void backpropagate_connected_layer(double *input, connected_layer layer);
|
||||
void calculate_update_connected_layer(double *input, connected_layer layer);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -33,12 +33,12 @@ void run_convolutional_layer(const image input, const convolutional_layer layer)
|
||||
for(i = 0; i < layer.n; ++i){
|
||||
convolve(input, layer.kernels[i], layer.stride, i, layer.output);
|
||||
}
|
||||
for(i = 0; i < input.h*input.w*input.c; ++i){
|
||||
input.data[i] = convolution_activation(input.data[i]);
|
||||
for(i = 0; i < layer.output.h*layer.output.w*layer.output.c; ++i){
|
||||
layer.output.data[i] = convolution_activation(layer.output.data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void backpropagate_layer(image input, convolutional_layer layer)
|
||||
void backpropagate_convolutional_layer(image input, convolutional_layer layer)
|
||||
{
|
||||
int i;
|
||||
zero_image(input);
|
||||
@ -47,7 +47,7 @@ void backpropagate_layer(image input, convolutional_layer layer)
|
||||
}
|
||||
}
|
||||
|
||||
void backpropagate_layer_convolve(image input, convolutional_layer layer)
|
||||
void backpropagate_convolutional_layer_convolve(image input, convolutional_layer layer)
|
||||
{
|
||||
int i,j;
|
||||
for(i = 0; i < layer.n; ++i){
|
||||
@ -67,20 +67,29 @@ void backpropagate_layer_convolve(image input, convolutional_layer layer)
|
||||
}
|
||||
}
|
||||
|
||||
void error_convolutional_layer(image input, convolutional_layer layer)
|
||||
void learn_convolutional_layer(image input, convolutional_layer layer)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < layer.n; ++i){
|
||||
kernel_update(input, layer.kernel_updates[i], layer.stride, i, layer.output);
|
||||
}
|
||||
image old_input = copy_image(input);
|
||||
zero_image(input);
|
||||
for(i = 0; i < layer.n; ++i){
|
||||
back_convolve(input, layer.kernels[i], layer.stride, i, layer.output);
|
||||
}
|
||||
backpropagate_convolutional_layer(input, layer);
|
||||
for(i = 0; i < input.h*input.w*input.c; ++i){
|
||||
input.data[i] = input.data[i]*convolution_gradient(input.data[i]);
|
||||
input.data[i] *= convolution_gradient(old_input.data[i]);
|
||||
}
|
||||
free_image(old_input);
|
||||
}
|
||||
|
||||
void update_convolutional_layer(convolutional_layer layer, double step)
|
||||
{
|
||||
int i,j;
|
||||
for(i = 0; i < layer.n; ++i){
|
||||
int pixels = layer.kernels[i].h*layer.kernels[i].w*layer.kernels[i].c;
|
||||
for(j = 0; j < pixels; ++j){
|
||||
layer.kernels[i].data[j] += step*layer.kernel_updates[i].data[j];
|
||||
}
|
||||
zero_image(layer.kernel_updates[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,8 +14,7 @@ typedef struct {
|
||||
|
||||
convolutional_layer make_convolutional_layer(int w, int h, int c, int n, int size, int stride);
|
||||
void run_convolutional_layer(const image input, const convolutional_layer layer);
|
||||
void backpropagate_layer(image input, convolutional_layer layer);
|
||||
void backpropagate_layer_convolve(image input, convolutional_layer layer);
|
||||
void learn_convolutional_layer(image input, convolutional_layer layer);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -132,7 +132,7 @@ image make_random_image(int h, int w, int c)
|
||||
image out = make_image(h,w,c);
|
||||
int i;
|
||||
for(i = 0; i < h*w*c; ++i){
|
||||
out.data[i] = (double)rand()/RAND_MAX;
|
||||
out.data[i] = .5-(double)rand()/RAND_MAX;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
void run_network(image input, network net)
|
||||
{
|
||||
int i;
|
||||
double *input_d = 0;
|
||||
double *input_d = input.data;
|
||||
for(i = 0; i < net.n; ++i){
|
||||
if(net.types[i] == CONVOLUTIONAL){
|
||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||
@ -30,6 +30,77 @@ void run_network(image input, network net)
|
||||
}
|
||||
}
|
||||
|
||||
void update_network(network net, double step)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < net.n; ++i){
|
||||
if(net.types[i] == CONVOLUTIONAL){
|
||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||
update_convolutional_layer(layer, step);
|
||||
}
|
||||
else if(net.types[i] == MAXPOOL){
|
||||
//maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
||||
}
|
||||
else if(net.types[i] == CONNECTED){
|
||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
||||
update_connected_layer(layer, step);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void learn_network(image input, network net)
|
||||
{
|
||||
int i;
|
||||
image prev;
|
||||
double *prev_p;
|
||||
for(i = net.n-1; i >= 0; --i){
|
||||
if(i == 0){
|
||||
prev = input;
|
||||
prev_p = prev.data;
|
||||
} else if(net.types[i-1] == CONVOLUTIONAL){
|
||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i-1];
|
||||
prev = layer.output;
|
||||
prev_p = prev.data;
|
||||
} else if(net.types[i-1] == MAXPOOL){
|
||||
maxpool_layer layer = *(maxpool_layer *)net.layers[i-1];
|
||||
prev = layer.output;
|
||||
prev_p = prev.data;
|
||||
} else if(net.types[i-1] == CONNECTED){
|
||||
connected_layer layer = *(connected_layer *)net.layers[i-1];
|
||||
prev_p = layer.output;
|
||||
}
|
||||
|
||||
if(net.types[i] == CONVOLUTIONAL){
|
||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||
learn_convolutional_layer(prev, layer);
|
||||
}
|
||||
else if(net.types[i] == MAXPOOL){
|
||||
//maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
||||
}
|
||||
else if(net.types[i] == CONNECTED){
|
||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
||||
learn_connected_layer(prev_p, layer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double *get_network_output(network net)
|
||||
{
|
||||
int i = net.n-1;
|
||||
if(net.types[i] == CONVOLUTIONAL){
|
||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||
return layer.output.data;
|
||||
}
|
||||
else if(net.types[i] == MAXPOOL){
|
||||
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
||||
return layer.output.data;
|
||||
}
|
||||
else if(net.types[i] == CONNECTED){
|
||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
||||
return layer.output;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
image get_network_image(network net)
|
||||
{
|
||||
int i;
|
||||
|
@ -17,6 +17,9 @@ typedef struct {
|
||||
} network;
|
||||
|
||||
void run_network(image input, network net);
|
||||
double *get_network_output(network net);
|
||||
void learn_network(image input, network net);
|
||||
void update_network(network net, double step);
|
||||
image get_network_image(network net);
|
||||
|
||||
#endif
|
||||
|
61
src/tests.c
61
src/tests.c
@ -34,11 +34,11 @@ void test_color()
|
||||
void test_convolutional_layer()
|
||||
{
|
||||
srand(0);
|
||||
image dog = load_image("test_dog.jpg");
|
||||
image dog = load_image("dog.jpg");
|
||||
int i;
|
||||
int n = 5;
|
||||
int n = 3;
|
||||
int stride = 1;
|
||||
int size = 8;
|
||||
int size = 3;
|
||||
convolutional_layer layer = make_convolutional_layer(dog.h, dog.w, dog.c, n, size, stride);
|
||||
char buff[256];
|
||||
for(i = 0; i < n; ++i) {
|
||||
@ -47,7 +47,7 @@ void test_convolutional_layer()
|
||||
}
|
||||
run_convolutional_layer(dog, layer);
|
||||
|
||||
maxpool_layer mlayer = make_maxpool_layer(layer.output.h, layer.output.w, layer.output.c, 3);
|
||||
maxpool_layer mlayer = make_maxpool_layer(layer.output.h, layer.output.w, layer.output.c, 2);
|
||||
run_maxpool_layer(layer.output,mlayer);
|
||||
|
||||
show_image_layers(mlayer.output, "Test Maxpool Layer");
|
||||
@ -128,9 +128,9 @@ void test_network()
|
||||
n = 128;
|
||||
convolutional_layer cl5 = make_convolutional_layer(cl4.output.h, cl4.output.w, cl4.output.c, n, size, stride);
|
||||
maxpool_layer ml3 = make_maxpool_layer(cl5.output.h, cl5.output.w, cl5.output.c, 4);
|
||||
connected_layer nl = make_connected_layer(ml3.output.h*ml3.output.w*ml3.output.c, 4096);
|
||||
connected_layer nl2 = make_connected_layer(4096, 4096);
|
||||
connected_layer nl3 = make_connected_layer(4096, 1000);
|
||||
connected_layer nl = make_connected_layer(ml3.output.h*ml3.output.w*ml3.output.c, 4096, RELU);
|
||||
connected_layer nl2 = make_connected_layer(4096, 4096, RELU);
|
||||
connected_layer nl3 = make_connected_layer(4096, 1000, RELU);
|
||||
|
||||
net.layers[0] = &cl;
|
||||
net.layers[1] = &ml;
|
||||
@ -155,6 +155,7 @@ void test_network()
|
||||
|
||||
show_image_layers(get_network_image(net), "Test Network Layer");
|
||||
}
|
||||
|
||||
void test_backpropagate()
|
||||
{
|
||||
int n = 3;
|
||||
@ -169,13 +170,13 @@ void test_backpropagate()
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i < 100; ++i){
|
||||
backpropagate_layer(dog_copy, cl);
|
||||
backpropagate_convolutional_layer(dog_copy, cl);
|
||||
}
|
||||
end = clock();
|
||||
printf("Backpropagate: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
|
||||
start = clock();
|
||||
for(i = 0; i < 100; ++i){
|
||||
backpropagate_layer_convolve(dog, cl);
|
||||
backpropagate_convolutional_layer_convolve(dog, cl);
|
||||
}
|
||||
end = clock();
|
||||
printf("Backpropagate Using Convolutions: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
|
||||
@ -185,14 +186,54 @@ void test_backpropagate()
|
||||
show_image(dog, "Test Backpropagate Difference");
|
||||
}
|
||||
|
||||
void test_ann()
|
||||
{
|
||||
network net;
|
||||
net.n = 3;
|
||||
net.layers = calloc(net.n, sizeof(void *));
|
||||
net.types = calloc(net.n, sizeof(LAYER_TYPE));
|
||||
net.types[0] = CONNECTED;
|
||||
net.types[1] = CONNECTED;
|
||||
net.types[2] = CONNECTED;
|
||||
|
||||
connected_layer nl = make_connected_layer(1, 20, RELU);
|
||||
connected_layer nl2 = make_connected_layer(20, 20, RELU);
|
||||
connected_layer nl3 = make_connected_layer(20, 1, RELU);
|
||||
|
||||
net.layers[0] = &nl;
|
||||
net.layers[1] = &nl2;
|
||||
net.layers[2] = &nl3;
|
||||
|
||||
image t = make_image(1,1,1);
|
||||
int count = 0;
|
||||
|
||||
double avgerr = 0;
|
||||
while(1){
|
||||
double v = ((double)rand()/RAND_MAX);
|
||||
double truth = v*v;
|
||||
set_pixel(t,0,0,0,v);
|
||||
run_network(t, net);
|
||||
double *out = get_network_output(net);
|
||||
double err = pow((out[0]-truth),2.);
|
||||
avgerr = .99 * avgerr + .01 * err;
|
||||
//if(++count % 100000 == 0) printf("%f\n", avgerr);
|
||||
if(++count % 100000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
|
||||
out[0] = truth - out[0];
|
||||
learn_network(t, net);
|
||||
update_network(net, .001);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
//test_backpropagate();
|
||||
test_ann();
|
||||
//test_convolve();
|
||||
//test_upsample();
|
||||
//test_rotate();
|
||||
//test_load();
|
||||
test_network();
|
||||
//test_network();
|
||||
//test_convolutional_layer();
|
||||
//test_color();
|
||||
cvWaitKey(0);
|
||||
|
Loading…
Reference in New Issue
Block a user