Nist NIN testing multi-crop

2023-08-10 21:13:14 +03:00 · 2014-08-11 12:52:07 -07:00
parent 7add111509
commit 176d65b765
11 changed files with 288 additions and 51 deletions
--- a/2
+++ b/2
@ -25,7 +25,7 @@ VPATH=./src/
 EXEC=cnn
 OBJDIR=./obj/

-OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o
+OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o crop_layer.o
 OBJS = $(addprefix $(OBJDIR), $(OBJ))

 all: $(EXEC)
--- a/src/cnn.c
+++ b/src/cnn.c
@ -240,9 +240,22 @@ void test_full()

 void test_cifar10()
 {
-	srand(222222);
+
+    network net = parse_network_cfg("cfg/cifar10_part5.cfg");
+    data test = load_cifar10_data("data/cifar10/test_batch.bin");
+        clock_t start = clock(), end;
+    float test_acc = network_accuracy(net, test);
+        end = clock();
+    printf("%f in %f Sec\n", test_acc, (float)(end-start)/CLOCKS_PER_SEC);
+    visualize_network(net);
+    cvWaitKey(0);
+}
+
+void train_cifar10()
+{
+    srand(555555);
    network net = parse_network_cfg("cfg/cifar10.cfg");
-    //data test = load_cifar10_data("data/cifar10/test_batch.bin");
+    data test = load_cifar10_data("data/cifar10/test_batch.bin");
    int count = 0;
    int iters = 10000/net.batch;
    data train = load_all_cifar10();
@ -250,12 +263,20 @@ void test_cifar10()
        clock_t start = clock(), end;
        float loss = train_network_sgd(net, train, iters);
        end = clock();
-        //visualize_network(net);
-        //cvWaitKey(1000);
+        visualize_network(net);
+        cvWaitKey(5000);

        //float test_acc = network_accuracy(net, test);
        //printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
-        printf("%d: Loss: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+        if(count%10 == 0){
+            float test_acc = network_accuracy(net, test);
+            printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+            char buff[256];
+            sprintf(buff, "/home/pjreddie/cifar/cifar2_%d.cfg", count);
+            save_network(net, buff);
+        }else{
+            printf("%d: Loss: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+        }
    }
    free_data(train);
 }
@ -292,13 +313,25 @@ void test_nist_single()
 void test_nist()
 {
    srand(222222);
-    network net = parse_network_cfg("cfg/nist.cfg");
+    network net = parse_network_cfg("cfg/nist_final.cfg");
+    data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10);
+    translate_data_rows(test, -144);
+    clock_t start = clock(), end;
+    float test_acc = network_accuracy_multi(net, test,16);
+    end = clock();
+    printf("Accuracy: %f, Time: %lf seconds\n", test_acc,(float)(end-start)/CLOCKS_PER_SEC);
+}
+
+void train_nist()
+{
+    srand(222222);
+    network net = parse_network_cfg("cfg/nist_final.cfg");
    data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
    data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10);
-	translate_data_rows(train, -144);
-	//scale_data_rows(train, 1./128);
-	translate_data_rows(test, -144);
-	//scale_data_rows(test, 1./128);
+    translate_data_rows(train, -144);
+    //scale_data_rows(train, 1./128);
+    translate_data_rows(test, -144);
+    //scale_data_rows(test, 1./128);
    //randomize_data(train);
    int count = 0;
    //clock_t start = clock(), end;
@ -311,12 +344,12 @@ void test_nist()
        //float test_acc = 0;
        printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
        /*printf("%f %f %f %f %f\n", mean_array(get_network_output_layer(net,0), 100),
-        mean_array(get_network_output_layer(net,1), 100),
-        mean_array(get_network_output_layer(net,2), 100),
-        mean_array(get_network_output_layer(net,3), 100),
-        mean_array(get_network_output_layer(net,4), 100));
-        */
-        //save_network(net, "cfg/nist_basic_trained.cfg");
+          mean_array(get_network_output_layer(net,1), 100),
+          mean_array(get_network_output_layer(net,2), 100),
+          mean_array(get_network_output_layer(net,3), 100),
+          mean_array(get_network_output_layer(net,4), 100));
+         */
+        save_network(net, "cfg/nist_final2.cfg");

        //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay);
        //end = clock();
@ -778,6 +811,7 @@ int main(int argc, char *argv[])
    //test_nist_single();
    test_nist();
    //test_cifar10();
+    //train_cifar10();
    //test_vince();
    //test_full();
    //tune_VOC();
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -166,7 +166,7 @@ void learn_bias_convolutional_layer(convolutional_layer layer)
        *convolutional_out_width(layer);
    for(b = 0; b < layer.batch; ++b){
        for(i = 0; i < layer.n; ++i){
-            layer.bias_updates[i] += mean_array(layer.delta+size*(i+b*layer.n), size);
+            layer.bias_updates[i] += sum_array(layer.delta+size*(i+b*layer.n), size);
        }
    }
 }
--- a/src/convolutional_layer_gpu.c
+++ b/src/convolutional_layer_gpu.c
--- a/src/crop_layer.c
+++ b/src/crop_layer.c
@ -0,0 +1,57 @@
+#include "crop_layer.h"
+#include <stdio.h>
+
+image get_crop_image(crop_layer layer)
+{
+    int h = layer.crop_height;
+    int w = layer.crop_width;
+    int c = layer.c;
+    return float_to_image(h,w,c,layer.output);
+}
+
+crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip)
+{
+    fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
+    crop_layer *layer = calloc(1, sizeof(crop_layer));
+    layer->batch = batch;
+    layer->h = h;
+    layer->w = w;
+    layer->c = c;
+    layer->flip = flip;
+    layer->crop_width = crop_width;
+    layer->crop_height = crop_height;
+    layer->output = calloc(crop_width*crop_height * c*batch, sizeof(float));
+    layer->delta = calloc(crop_width*crop_height * c*batch, sizeof(float));
+    return layer;
+}
+void forward_crop_layer(const crop_layer layer, float *input)
+{
+    int i,j,c,b;
+    int dh = rand()%(layer.h - layer.crop_height);
+    int dw = rand()%(layer.w - layer.crop_width);
+    int count = 0;
+    if(layer.flip && rand()%2){
+        for(b = 0; b < layer.batch; ++b){
+            for(c = 0; c < layer.c; ++c){
+                for(i = dh; i < dh+layer.crop_height; ++i){
+                    for(j = dw+layer.crop_width-1; j >= dw; --j){
+                        int index = j+layer.w*(i+layer.h*(c + layer.c*b));
+                        layer.output[count++] = input[index];
+                    }
+                }
+            }
+        }
+    }else{
+        for(b = 0; b < layer.batch; ++b){
+            for(c = 0; c < layer.c; ++c){
+                for(i = dh; i < dh+layer.crop_height; ++i){
+                    for(j = dw; j < dw+layer.crop_width; ++j){
+                        int index = j+layer.w*(i+layer.h*(c + layer.c*b));
+                        layer.output[count++] = input[index];
+                    }
+                }
+            }
+        }
+    }
+}
+
--- a/src/crop_layer.h
+++ b/src/crop_layer.h
@ -0,0 +1,22 @@
+#ifndef CROP_LAYER_H
+#define CROP_LAYER_H
+
+#include "image.h"
+
+typedef struct {
+    int batch;
+    int h,w,c;
+    int crop_width;
+    int crop_height;
+    int flip;
+    float *delta;
+    float *output;
+} crop_layer;
+
+image get_crop_image(crop_layer layer);
+crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip);
+void forward_crop_layer(const crop_layer layer, float *input);
+void backward_crop_layer(const crop_layer layer, float *input, float *delta);
+
+#endif
+
--- a/src/network.c
+++ b/src/network.c
@ -4,6 +4,7 @@
 #include "data.h"
 #include "utils.h"

+#include "crop_layer.h"
 #include "connected_layer.h"
 #include "convolutional_layer.h"
 #include "maxpool_layer.h"
@ -56,6 +57,11 @@ void forward_network(network net, float *input, int train)
            forward_softmax_layer(layer, input);
            input = layer.output;
        }
+        else if(net.types[i] == CROP){
+            crop_layer layer = *(crop_layer *)net.layers[i];
+            forward_crop_layer(layer, input);
+            input = layer.output;
+        }
        else if(net.types[i] == MAXPOOL){
            maxpool_layer layer = *(maxpool_layer *)net.layers[i];
            forward_maxpool_layer(layer, input);
@ -85,6 +91,11 @@ void forward_network(network net, float *input, int train)
            forward_connected_layer(layer, input);
            input = layer.output;
        }
+        else if(net.types[i] == CROP){
+            crop_layer layer = *(crop_layer *)net.layers[i];
+            forward_crop_layer(layer, input);
+            input = layer.output;
+        }
        else if(net.types[i] == SOFTMAX){
            softmax_layer layer = *(softmax_layer *)net.layers[i];
            forward_softmax_layer(layer, input);
@ -266,12 +277,14 @@ float train_network_sgd(network net, data d, int n)

    int i,j;
    float sum = 0;
+    int index = 0;
    for(i = 0; i < n; ++i){
        for(j = 0; j < batch; ++j){
-            int index = rand()%d.X.rows;
+            index = rand()%d.X.rows;
            memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float));
            memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float));
        }
+
        float err = train_network_datum(net, X, y);
        sum += err;
        //train_network_datum(net, X, y);
@ -300,6 +313,7 @@ float train_network_sgd(network net, data d, int n)
        //}
    }
    //printf("Accuracy: %f\n",(float) correct/n);
+    //show_image(float_to_image(32,32,3,X), "Orig");
    free(X);
    free(y);
    return (float)sum/(n*batch);
@ -446,6 +460,10 @@ image get_network_image_layer(network net, int i)
        normalization_layer layer = *(normalization_layer *)net.layers[i];
        return get_normalization_image(layer);
    }
+    else if(net.types[i] == CROP){
+        crop_layer layer = *(crop_layer *)net.layers[i];
+        return get_crop_image(layer);
+    }
    return make_empty_image(0,0,0);
 }

@ -464,6 +482,7 @@ void visualize_network(network net)
    image *prev = 0;
    int i;
    char buff[256];
+    show_image(get_network_image_layer(net, 0), "Crop");
    for(i = 0; i < net.n; ++i){
        sprintf(buff, "Layer %d", i);
        if(net.types[i] == CONVOLUTIONAL){
@ -484,6 +503,31 @@ float *network_predict(network net, float *input)
    return out;
 }

+matrix network_predict_data_multi(network net, data test, int n)
+{
+    int i,j,b,m;
+    int k = get_network_output_size(net);
+    matrix pred = make_matrix(test.X.rows, k);
+    float *X = calloc(net.batch*test.X.rows, sizeof(float));
+    for(i = 0; i < test.X.rows; i += net.batch){
+        for(b = 0; b < net.batch; ++b){
+            if(i+b == test.X.rows) break;
+            memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
+        }
+        for(m = 0; m < n; ++m){
+            float *out = network_predict(net, X);
+            for(b = 0; b < net.batch; ++b){
+                if(i+b == test.X.rows) break;
+                for(j = 0; j < k; ++j){
+                    pred.vals[i+b][j] += out[j+b*k]/n;
+                }
+            }
+        }
+    }
+    free(X);
+    return pred;   
+}
+
 matrix network_predict_data(network net, data test)
 {
    int i,j,b;
@ -525,6 +569,12 @@ void print_network(network net)
            image m = get_maxpool_image(layer);
            n = m.h*m.w*m.c;
        }
+        else if(net.types[i] == CROP){
+            crop_layer layer = *(crop_layer *)net.layers[i];
+            output = layer.output;
+            image m = get_crop_image(layer);
+            n = m.h*m.w*m.c;
+        }
        else if(net.types[i] == CONNECTED){
            connected_layer layer = *(connected_layer *)net.layers[i];
            output = layer.output;
@ -553,4 +603,12 @@ float network_accuracy(network net, data d)
    return acc;
 }

+float network_accuracy_multi(network net, data d, int n)
+{
+    matrix guess = network_predict_data_multi(net, d, n);
+    float acc = matrix_accuracy(d.y, guess);
+    free_matrix(guess);
+    return acc;
+}
+

--- a/src/network.h
+++ b/src/network.h
@ -12,7 +12,8 @@ typedef enum {
    MAXPOOL,
    SOFTMAX,
    NORMALIZATION,
-    DROPOUT
+    DROPOUT,
+    CROP
 } LAYER_TYPE;

 typedef struct {
@ -41,6 +42,7 @@ float train_network_batch(network net, data d, int n);
 void train_network(network net, data d);
 matrix network_predict_data(network net, data test);
 float network_accuracy(network net, data d);
+float network_accuracy_multi(network net, data d, int n);
 float *get_network_output(network net);
 float *get_network_output_layer(network net, int i);
 float *get_network_delta_layer(network net, int i);
--- a/src/parser.c
+++ b/src/parser.c
@ -4,6 +4,7 @@

 #include "parser.h"
 #include "activations.h"
+#include "crop_layer.h"
 #include "convolutional_layer.h"
 #include "connected_layer.h"
 #include "maxpool_layer.h"
@ -24,6 +25,7 @@ int is_connected(section *s);
 int is_maxpool(section *s);
 int is_dropout(section *s);
 int is_softmax(section *s);
+int is_crop(section *s);
 int is_normalization(section *s);
 list *read_cfg(char *filename);

@ -43,6 +45,22 @@ void free_section(section *s)
    free(s);
 }

+void parse_data(char *data, float *a, int n)
+{
+    int i;
+    if(!data) return;
+    char *curr = data;
+    char *next = data;
+    int done = 0;
+    for(i = 0; i < n && !done; ++i){
+        while(*++next !='\0' && *next != ',');
+        if(*next == '\0') done = 1;
+        *next = '\0';
+        sscanf(curr, "%g", &a[i]);
+        curr = next+1;
+    }
+}
+
 convolutional_layer *parse_convolutional(list *options, network *net, int count)
 {
    int i;
@ -95,30 +113,8 @@ convolutional_layer *parse_convolutional(list *options, network *net, int count)
    }
    char *weights = option_find_str(options, "weights", 0);
    char *biases = option_find_str(options, "biases", 0);
-    if(biases){
-        char *curr = biases;
-        char *next = biases;
-        int done = 0;
-        for(i = 0; i < n && !done; ++i){
-            while(*++next !='\0' && *next != ',');
-            if(*next == '\0') done = 1;
-            *next = '\0';
-            sscanf(curr, "%g", &layer->biases[i]);
-            curr = next+1;
-        }
-    }
-    if(weights){
-        char *curr = weights;
-        char *next = weights;
-        int done = 0;
-        for(i = 0; i < c*n*size*size && !done; ++i){
-            while(*++next !='\0' && *next != ',');
-            if(*next == '\0') done = 1;
-            *next = '\0';
-            sscanf(curr, "%g", &layer->filters[i]);
-            curr = next+1;
-        }
-    }
+    parse_data(biases, layer->biases, n);
+    parse_data(weights, layer->filters, c*n*size*size);
    option_unused(options);
    return layer;
 }
@ -164,6 +160,10 @@ connected_layer *parse_connected(list *options, network *net, int count)
            curr = next+1;
        }
    }
+    char *weights = option_find_str(options, "weights", 0);
+    char *biases = option_find_str(options, "biases", 0);
+    parse_data(biases, layer->biases, output);
+    parse_data(weights, layer->weights, input*output);
    option_unused(options);
    return layer;
 }
@ -182,6 +182,36 @@ softmax_layer *parse_softmax(list *options, network *net, int count)
    return layer;
 }

+crop_layer *parse_crop(list *options, network *net, int count)
+{
+    float learning_rate, momentum, decay;
+    int h,w,c;
+    int crop_height = option_find_int(options, "crop_height",1);
+    int crop_width = option_find_int(options, "crop_width",1);
+    int flip = option_find_int(options, "flip",0);
+    if(count == 0){
+        h = option_find_int(options, "height",1);
+        w = option_find_int(options, "width",1);
+        c = option_find_int(options, "channels",1);
+        net->batch = option_find_int(options, "batch",1);
+        learning_rate = option_find_float(options, "learning_rate", .001);
+        momentum = option_find_float(options, "momentum", .9);
+        decay = option_find_float(options, "decay", .0001);
+        net->learning_rate = learning_rate;
+        net->momentum = momentum;
+        net->decay = decay;
+    }else{
+        image m =  get_network_image_layer(*net, count-1);
+        h = m.h;
+        w = m.w;
+        c = m.c;
+        if(h == 0) error("Layer before crop layer must output image.");
+    }
+    crop_layer *layer = make_crop_layer(net->batch,h,w,c,crop_height,crop_width,flip);
+    option_unused(options);
+    return layer;
+}
+
 maxpool_layer *parse_maxpool(list *options, network *net, int count)
 {
    int h,w,c;
@ -261,6 +291,10 @@ network parse_network_cfg(char *filename)
            connected_layer *layer = parse_connected(options, &net, count);
            net.types[count] = CONNECTED;
            net.layers[count] = layer;
+        }else if(is_crop(s)){
+            crop_layer *layer = parse_crop(options, &net, count);
+            net.types[count] = CROP;
+            net.layers[count] = layer;
        }else if(is_softmax(s)){
            softmax_layer *layer = parse_softmax(options, &net, count);
            net.types[count] = SOFTMAX;
@ -290,6 +324,10 @@ network parse_network_cfg(char *filename)
    return net;
 }

+int is_crop(section *s)
+{
+    return (strcmp(s->type, "[crop]")==0);
+}
 int is_convolutional(section *s)
 {
    return (strcmp(s->type, "[conv]")==0
@ -389,11 +427,11 @@ void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int
                l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay);
    } else {
        if(l->learning_rate != net.learning_rate)
-                fprintf(fp, "learning_rate=%g\n", l->learning_rate);
+            fprintf(fp, "learning_rate=%g\n", l->learning_rate);
        if(l->momentum != net.momentum)
-                fprintf(fp, "momentum=%g\n", l->momentum);
+            fprintf(fp, "momentum=%g\n", l->momentum);
        if(l->decay != net.decay)
-                fprintf(fp, "decay=%g\n", l->decay);
+            fprintf(fp, "decay=%g\n", l->decay);
    }
    fprintf(fp, "filters=%d\n"
            "size=%d\n"
@ -432,12 +470,30 @@ void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count)
            "activation=%s\n",
            l->outputs,
            get_activation_string(l->activation));
-    fprintf(fp, "data=");
+    fprintf(fp, "biases=");
    for(i = 0; i < l->outputs; ++i) fprintf(fp, "%g,", l->biases[i]);
-    for(i = 0; i < l->inputs*l->outputs; ++i) fprintf(fp, "%g,", l->weights[i]);
+    fprintf(fp, "\n");
+    fprintf(fp, "weights=");
+    for(i = 0; i < l->outputs*l->inputs; ++i) fprintf(fp, "%g,", l->weights[i]);
    fprintf(fp, "\n\n");
 }

+void print_crop_cfg(FILE *fp, crop_layer *l, network net, int count)
+{
+    fprintf(fp, "[crop]\n");
+    if(count == 0) {
+        fprintf(fp,   "batch=%d\n"
+                "height=%d\n"
+                "width=%d\n"
+                "channels=%d\n"
+                "learning_rate=%g\n"
+                "momentum=%g\n"
+                "decay=%g\n",
+                l->batch,l->h, l->w, l->c, net.learning_rate, net.momentum, net.decay);
+    }
+    fprintf(fp, "crop_height=%d\ncrop_width=%d\nflip=%d\n\n", l->crop_height, l->crop_width, l->flip);
+}
+
 void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count)
 {
    fprintf(fp, "[maxpool]\n");
@ -481,6 +537,8 @@ void save_network(network net, char *filename)
            print_convolutional_cfg(fp, (convolutional_layer *)net.layers[i], net, i);
        else if(net.types[i] == CONNECTED)
            print_connected_cfg(fp, (connected_layer *)net.layers[i], net, i);
+        else if(net.types[i] == CROP)
+            print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i);
        else if(net.types[i] == MAXPOOL)
            print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i);
        else if(net.types[i] == NORMALIZATION)
--- a/src/utils.c
+++ b/src/utils.c
@ -143,12 +143,17 @@ float *parse_fields(char *line, int n)
 	return field;
 }

-float mean_array(float *a, int n)
+float sum_array(float *a, int n)
 {
    int i;
    float sum = 0;
    for(i = 0; i < n; ++i) sum += a[i];
-    return sum/n;
+    return sum;
+}
+
+float mean_array(float *a, int n)
+{
+    return sum_array(a,n)/n;
 }

 float variance_array(float *a, int n)
--- a/src/utils.h
+++ b/src/utils.h
@ -21,6 +21,7 @@ int max_index(float *a, int n);
 float constrain(float a, float max);
 float rand_normal();
 float rand_uniform();
+float sum_array(float *a, int n);
 float mean_array(float *a, int n);
 float variance_array(float *a, int n);
 float **one_hot_encode(float *a, int n, int k);