mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Works well on MNIST
This commit is contained in:
parent
0d6bb5d44d
commit
b715671988
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,6 +1,8 @@
|
||||
*.o
|
||||
*.dSYM
|
||||
*.csv
|
||||
*.out
|
||||
mnist/
|
||||
images/
|
||||
opencv/
|
||||
convnet/
|
||||
|
10
nist.cfg
10
nist.cfg
@ -2,7 +2,7 @@
|
||||
width=28
|
||||
height=28
|
||||
channels=1
|
||||
filters=4
|
||||
filters=6
|
||||
size=5
|
||||
stride=1
|
||||
activation=ramp
|
||||
@ -11,7 +11,7 @@ activation=ramp
|
||||
stride=2
|
||||
|
||||
[conv]
|
||||
filters=12
|
||||
filters=16
|
||||
size=5
|
||||
stride=1
|
||||
activation=ramp
|
||||
@ -20,7 +20,7 @@ activation=ramp
|
||||
stride=2
|
||||
|
||||
[conv]
|
||||
filters=10
|
||||
filters=120
|
||||
size=3
|
||||
stride=1
|
||||
activation=ramp
|
||||
@ -28,6 +28,10 @@ activation=ramp
|
||||
[maxpool]
|
||||
stride=2
|
||||
|
||||
[conn]
|
||||
output = 80
|
||||
activation=ramp
|
||||
|
||||
[conn]
|
||||
output = 10
|
||||
activation=ramp
|
||||
|
@ -8,15 +8,16 @@ ACTIVATION get_activation(char *s)
|
||||
{
|
||||
if (strcmp(s, "sigmoid")==0) return SIGMOID;
|
||||
if (strcmp(s, "relu")==0) return RELU;
|
||||
if (strcmp(s, "identity")==0) return IDENTITY;
|
||||
if (strcmp(s, "linear")==0) return LINEAR;
|
||||
if (strcmp(s, "ramp")==0) return RAMP;
|
||||
if (strcmp(s, "tanh")==0) return TANH;
|
||||
fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
|
||||
return RELU;
|
||||
}
|
||||
|
||||
double activate(double x, ACTIVATION a){
|
||||
switch(a){
|
||||
case IDENTITY:
|
||||
case LINEAR:
|
||||
return x;
|
||||
case SIGMOID:
|
||||
return 1./(1.+exp(-x));
|
||||
@ -24,12 +25,14 @@ double activate(double x, ACTIVATION a){
|
||||
return x*(x>0);
|
||||
case RAMP:
|
||||
return x*(x>0) + .1*x;
|
||||
case TANH:
|
||||
return (exp(2*x)-1)/(exp(2*x)+1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
double gradient(double x, ACTIVATION a){
|
||||
switch(a){
|
||||
case IDENTITY:
|
||||
case LINEAR:
|
||||
return 1;
|
||||
case SIGMOID:
|
||||
return (1.-x)*x;
|
||||
@ -37,35 +40,9 @@ double gradient(double x, ACTIVATION a){
|
||||
return (x>0);
|
||||
case RAMP:
|
||||
return (x>0) + .1;
|
||||
case TANH:
|
||||
return 1-x*x;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
double identity_activation(double x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
double identity_gradient(double x)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
double relu_activation(double x)
|
||||
{
|
||||
return x*(x>0);
|
||||
}
|
||||
double relu_gradient(double x)
|
||||
{
|
||||
return (x>0);
|
||||
}
|
||||
|
||||
double sigmoid_activation(double x)
|
||||
{
|
||||
return 1./(1.+exp(-x));
|
||||
}
|
||||
|
||||
double sigmoid_gradient(double x)
|
||||
{
|
||||
return x*(1.-x);
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
#define ACTIVATIONS_H
|
||||
|
||||
typedef enum{
|
||||
SIGMOID, RELU, IDENTITY, RAMP
|
||||
SIGMOID, RELU, LINEAR, RAMP, TANH
|
||||
}ACTIVATION;
|
||||
|
||||
ACTIVATION get_activation(char *s);
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activation)
|
||||
{
|
||||
printf("Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
|
||||
fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
|
||||
int i;
|
||||
connected_layer *layer = calloc(1, sizeof(connected_layer));
|
||||
layer->inputs = inputs;
|
||||
@ -29,7 +29,7 @@ connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activa
|
||||
layer->biases = calloc(outputs, sizeof(double));
|
||||
for(i = 0; i < outputs; ++i)
|
||||
//layer->biases[i] = rand_normal()*scale + scale;
|
||||
layer->biases[i] = 1;
|
||||
layer->biases[i] = 0;
|
||||
|
||||
layer->activation = activation;
|
||||
return layer;
|
||||
|
@ -39,7 +39,7 @@ convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int si
|
||||
layer->w = w;
|
||||
layer->c = c;
|
||||
layer->n = n;
|
||||
layer->edge = 0;
|
||||
layer->edge = 1;
|
||||
layer->stride = stride;
|
||||
layer->kernels = calloc(n, sizeof(image));
|
||||
layer->kernel_updates = calloc(n, sizeof(image));
|
||||
@ -47,10 +47,10 @@ convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int si
|
||||
layer->biases = calloc(n, sizeof(double));
|
||||
layer->bias_updates = calloc(n, sizeof(double));
|
||||
layer->bias_momentum = calloc(n, sizeof(double));
|
||||
double scale = 20./(size*size*c);
|
||||
double scale = 2./(size*size);
|
||||
for(i = 0; i < n; ++i){
|
||||
//layer->biases[i] = rand_normal()*scale + scale;
|
||||
layer->biases[i] = 1;
|
||||
layer->biases[i] = 0;
|
||||
layer->kernels[i] = make_random_kernel(size, c, scale);
|
||||
layer->kernel_updates[i] = make_random_kernel(size, c, 0);
|
||||
layer->kernel_momentum[i] = make_random_kernel(size, c, 0);
|
||||
@ -63,7 +63,7 @@ convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int si
|
||||
out_h = (layer->h - layer->size)/layer->stride+1;
|
||||
out_w = (layer->h - layer->size)/layer->stride+1;
|
||||
}
|
||||
printf("Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
|
||||
fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
|
||||
layer->output = calloc(out_h * out_w * n, sizeof(double));
|
||||
layer->delta = calloc(out_h * out_w * n, sizeof(double));
|
||||
layer->upsampled = make_image(h,w,n);
|
||||
@ -124,15 +124,22 @@ void backward_convolutional_layer2(convolutional_layer layer, double *input, dou
|
||||
}
|
||||
}
|
||||
|
||||
void learn_convolutional_layer(convolutional_layer layer, double *input)
|
||||
void gradient_delta_convolutional_layer(convolutional_layer layer)
|
||||
{
|
||||
int i;
|
||||
image in_image = double_to_image(layer.h, layer.w, layer.c, input);
|
||||
image out_delta = get_convolutional_delta(layer);
|
||||
image out_image = get_convolutional_image(layer);
|
||||
for(i = 0; i < out_image.h*out_image.w*out_image.c; ++i){
|
||||
out_delta.data[i] *= gradient(out_image.data[i], layer.activation);
|
||||
}
|
||||
}
|
||||
|
||||
void learn_convolutional_layer(convolutional_layer layer, double *input)
|
||||
{
|
||||
int i;
|
||||
image in_image = double_to_image(layer.h, layer.w, layer.c, input);
|
||||
image out_delta = get_convolutional_delta(layer);
|
||||
gradient_delta_convolutional_layer(layer);
|
||||
for(i = 0; i < layer.n; ++i){
|
||||
kernel_update(in_image, layer.kernel_updates[i], layer.stride, i, out_delta, layer.edge);
|
||||
layer.bias_updates[i] += avg_image_layer(out_delta, i);
|
||||
|
@ -19,7 +19,7 @@ image get_maxpool_delta(maxpool_layer layer)
|
||||
|
||||
maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride)
|
||||
{
|
||||
printf("Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride);
|
||||
fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride);
|
||||
maxpool_layer *layer = calloc(1, sizeof(maxpool_layer));
|
||||
layer->h = h;
|
||||
layer->w = w;
|
||||
|
@ -276,10 +276,10 @@ void print_network(network net)
|
||||
}
|
||||
double mean = mean_array(output, n);
|
||||
double vari = variance_array(output, n);
|
||||
printf("Layer %d - Mean: %f, Variance: %f\n",i,mean, vari);
|
||||
fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari);
|
||||
if(n > 100) n = 100;
|
||||
for(j = 0; j < n; ++j) printf("%f, ", output[j]);
|
||||
if(n == 100)printf(".....\n");
|
||||
printf("\n");
|
||||
for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]);
|
||||
if(n == 100)fprintf(stderr,".....\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
softmax_layer *make_softmax_layer(int inputs)
|
||||
{
|
||||
printf("Softmax Layer: %d inputs\n", inputs);
|
||||
fprintf(stderr, "Softmax Layer: %d inputs\n", inputs);
|
||||
softmax_layer *layer = calloc(1, sizeof(softmax_layer));
|
||||
layer->inputs = inputs;
|
||||
layer->output = calloc(inputs, sizeof(double));
|
||||
|
86
src/tests.c
86
src/tests.c
@ -15,7 +15,6 @@
|
||||
void test_convolve()
|
||||
{
|
||||
image dog = load_image("dog.jpg");
|
||||
//show_image_layers(dog, "Dog");
|
||||
printf("dog channels %d\n", dog.c);
|
||||
image kernel = make_random_image(3,3,dog.c);
|
||||
image edge = make_image(dog.h, dog.w, 1);
|
||||
@ -88,7 +87,7 @@ void verify_convolutional_layer()
|
||||
image out_delta = get_convolutional_delta(layer);
|
||||
for(i = 0; i < out.h*out.w*out.c; ++i){
|
||||
out_delta.data[i] = 1;
|
||||
backward_convolutional_layer2(layer, test.data, in_delta.data);
|
||||
backward_convolutional_layer(layer, test.data, in_delta.data);
|
||||
image partial = copy_image(in_delta);
|
||||
jacobian2[i] = partial.data;
|
||||
out_delta.data[i] = 0;
|
||||
@ -156,7 +155,7 @@ void test_parser()
|
||||
int count = 0;
|
||||
|
||||
double avgerr = 0;
|
||||
while(1){
|
||||
while(++count < 100000000){
|
||||
double v = ((double)rand()/RAND_MAX);
|
||||
double truth = v*v;
|
||||
input[0] = v;
|
||||
@ -165,8 +164,7 @@ void test_parser()
|
||||
double *delta = get_network_delta(net);
|
||||
double err = pow((out[0]-truth),2.);
|
||||
avgerr = .99 * avgerr + .01 * err;
|
||||
//if(++count % 100000 == 0) printf("%f\n", avgerr);
|
||||
if(++count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
|
||||
if(count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
|
||||
delta[0] = truth - out[0];
|
||||
learn_network(net, input);
|
||||
update_network(net, .001);
|
||||
@ -197,15 +195,16 @@ void test_full()
|
||||
}
|
||||
}
|
||||
|
||||
double error_network(network net, matrix m, double *truth)
|
||||
double error_network(network net, matrix m, double **truth)
|
||||
{
|
||||
int i;
|
||||
int correct = 0;
|
||||
int k = get_network_output_size(net);
|
||||
for(i = 0; i < m.rows; ++i){
|
||||
forward_network(net, m.vals[i]);
|
||||
double *out = get_network_output(net);
|
||||
double err = truth[i] - out[0];
|
||||
if(fabs(err) < .5) ++correct;
|
||||
int guess = max_index(out, k);
|
||||
if(truth[i][guess]) ++correct;
|
||||
}
|
||||
return (double)correct/m.rows;
|
||||
}
|
||||
@ -224,24 +223,35 @@ double **one_hot(double *a, int n, int k)
|
||||
|
||||
void test_nist()
|
||||
{
|
||||
srand(999999);
|
||||
network net = parse_network_cfg("nist.cfg");
|
||||
matrix m = csv_to_matrix("images/nist_train.csv");
|
||||
matrix ho = hold_out_matrix(&m, 3000);
|
||||
matrix m = csv_to_matrix("mnist/mnist_train.csv");
|
||||
matrix test = csv_to_matrix("mnist/mnist_test.csv");
|
||||
double *truth_1d = pop_column(&m, 0);
|
||||
double **truth = one_hot(truth_1d, m.rows, 10);
|
||||
double *ho_truth_1d = pop_column(&ho, 0);
|
||||
double **ho_truth = one_hot(ho_truth_1d, ho.rows, 10);
|
||||
double *test_truth_1d = pop_column(&test, 0);
|
||||
double **test_truth = one_hot(test_truth_1d, test.rows, 10);
|
||||
int i,j;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i < test.rows; ++i){
|
||||
normalize_array(test.vals[i], 28*28);
|
||||
//scale_array(m.vals[i], 28*28, 1./255.);
|
||||
//translate_array(m.vals[i], 28*28, -.1);
|
||||
}
|
||||
for(i = 0; i < m.rows; ++i){
|
||||
normalize_array(m.vals[i], 28*28);
|
||||
//scale_array(m.vals[i], 28*28, 1./255.);
|
||||
//translate_array(m.vals[i], 28*28, -.1);
|
||||
}
|
||||
int count = 0;
|
||||
double lr = .0001;
|
||||
while(++count <= 3000000){
|
||||
double lr = .0005;
|
||||
while(++count <= 300){
|
||||
//lr *= .99;
|
||||
int index = 0;
|
||||
int correct = 0;
|
||||
for(i = 0; i < 1000; ++i){
|
||||
int number = 1000;
|
||||
for(i = 0; i < number; ++i){
|
||||
index = rand()%m.rows;
|
||||
normalize_array(m.vals[index], 28*28);
|
||||
forward_network(net, m.vals[index]);
|
||||
double *out = get_network_output(net);
|
||||
double *delta = get_network_delta(net);
|
||||
@ -260,19 +270,29 @@ void test_nist()
|
||||
}
|
||||
print_network(net);
|
||||
image input = double_to_image(28,28,1, m.vals[index]);
|
||||
show_image(input, "Input");
|
||||
//show_image(input, "Input");
|
||||
image o = get_network_image(net);
|
||||
show_image_collapsed(o, "Output");
|
||||
//show_image_collapsed(o, "Output");
|
||||
visualize_network(net);
|
||||
cvWaitKey(100);
|
||||
cvWaitKey(10);
|
||||
//double test_acc = error_network(net, m, truth);
|
||||
//double valid_acc = error_network(net, ho, ho_truth);
|
||||
//printf("%f, %f\n", test_acc, valid_acc);
|
||||
fprintf(stderr, "%5d: %f %f\n",count, (double)correct/1000, lr);
|
||||
//if(valid_acc > .70) break;
|
||||
fprintf(stderr, "\n%5d: %f %f\n\n",count, (double)correct/number, lr);
|
||||
if(count % 10 == 0 && 0){
|
||||
double train_acc = error_network(net, m, truth);
|
||||
fprintf(stderr, "\nTRAIN: %f\n", train_acc);
|
||||
double test_acc = error_network(net, test, test_truth);
|
||||
fprintf(stderr, "TEST: %f\n\n", test_acc);
|
||||
printf("%d, %f, %f\n", count, train_acc, test_acc);
|
||||
}
|
||||
if(count % (m.rows/number) == 0) lr /= 2;
|
||||
}
|
||||
double train_acc = error_network(net, m, truth);
|
||||
fprintf(stderr, "\nTRAIN: %f\n", train_acc);
|
||||
double test_acc = error_network(net, test, test_truth);
|
||||
fprintf(stderr, "TEST: %f\n\n", test_acc);
|
||||
printf("%d, %f, %f\n", count, train_acc, test_acc);
|
||||
end = clock();
|
||||
printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
|
||||
//printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
void test_kernel_update()
|
||||
@ -281,14 +301,14 @@ void test_kernel_update()
|
||||
double delta[] = {.1};
|
||||
double input[] = {.3, .5, .3, .5, .5, .5, .5, .0, .5};
|
||||
double kernel[] = {1,2,3,4,5,6,7,8,9};
|
||||
convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, IDENTITY);
|
||||
convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, LINEAR);
|
||||
layer.kernels[0].data = kernel;
|
||||
layer.delta = delta;
|
||||
learn_convolutional_layer(layer, input);
|
||||
print_image(layer.kernels[0]);
|
||||
print_image(get_convolutional_delta(layer));
|
||||
print_image(layer.kernel_updates[0]);
|
||||
|
||||
|
||||
}
|
||||
|
||||
void test_random_classify()
|
||||
@ -311,15 +331,15 @@ void test_random_classify()
|
||||
double *delta = get_network_delta(net);
|
||||
//printf("%f\n", out[0]);
|
||||
delta[0] = truth[index] - out[0];
|
||||
// printf("%f\n", delta[0]);
|
||||
// printf("%f\n", delta[0]);
|
||||
//printf("%f %f\n", truth[index], out[0]);
|
||||
learn_network(net, m.vals[index]);
|
||||
update_network(net, .00001);
|
||||
}
|
||||
double test_acc = error_network(net, m, truth);
|
||||
double valid_acc = error_network(net, ho, ho_truth);
|
||||
printf("%f, %f\n", test_acc, valid_acc);
|
||||
fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc);
|
||||
//double test_acc = error_network(net, m, truth);
|
||||
//double valid_acc = error_network(net, ho, ho_truth);
|
||||
//printf("%f, %f\n", test_acc, valid_acc);
|
||||
//fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc);
|
||||
//if(valid_acc > .70) break;
|
||||
}
|
||||
end = clock();
|
||||
@ -362,8 +382,8 @@ void test_random_preprocess()
|
||||
int main()
|
||||
{
|
||||
//test_kernel_update();
|
||||
//test_nist();
|
||||
test_full();
|
||||
test_nist();
|
||||
//test_full();
|
||||
//test_random_preprocess();
|
||||
//test_random_classify();
|
||||
//test_parser();
|
||||
|
29
src/utils.c
29
src/utils.c
@ -180,6 +180,35 @@ void normalize_array(double *a, int n)
|
||||
sigma = sqrt(variance_array(a,n));
|
||||
}
|
||||
|
||||
void translate_array(double *a, int n, double s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
a[i] += s;
|
||||
}
|
||||
}
|
||||
|
||||
void scale_array(double *a, int n, double s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
a[i] *= s;
|
||||
}
|
||||
}
|
||||
int max_index(double *a, int n)
|
||||
{
|
||||
if(n <= 0) return -1;
|
||||
int i, max_i = 0;
|
||||
double max = a[0];
|
||||
for(i = 1; i < n; ++i){
|
||||
if(a[i] > max){
|
||||
max = a[i];
|
||||
max_i = i;
|
||||
}
|
||||
}
|
||||
return max_i;
|
||||
}
|
||||
|
||||
double rand_normal()
|
||||
{
|
||||
int i;
|
||||
|
@ -15,6 +15,9 @@ char *copy_string(char *s);
|
||||
int count_fields(char *line);
|
||||
double *parse_fields(char *line, int n);
|
||||
void normalize_array(double *a, int n);
|
||||
void scale_array(double *a, int n, double s);
|
||||
void translate_array(double *a, int n, double s);
|
||||
int max_index(double *a, int n);
|
||||
double constrain(double a, double max);
|
||||
double rand_normal();
|
||||
double mean_array(double *a, int n);
|
||||
|
Loading…
Reference in New Issue
Block a user