From d9f1b0b16edeb59281355a855e18a8be343fc33c Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Fri, 8 Aug 2014 12:04:15 -0700
Subject: [PATCH] probably how maxpool layers should be

---
 Makefile                      |   6 +-
 src/activations.c             |  21 +-
 src/activations.cl            |  17 +-
 src/activations.h             |   6 +-
 src/cnn.c                     | 974 +++++++++++++++++-----------------
 src/col2im.c                  |   8 +-
 src/connected_layer.c         |  22 +-
 src/connected_layer.h         |  12 +-
 src/convolutional_layer.c     |  75 +--
 src/convolutional_layer.h     |   8 +-
 src/convolutional_layer_gpu.c |   0
 src/data.c                    |  37 ++
 src/data.h                    |   1 +
 src/dropout_layer.c           |  26 +
 src/dropout_layer.h           |  15 +
 src/im2col.c                  |  32 +-
 src/im2col.cl                 |   6 +-
 src/image.c                   |   4 +-
 src/maxpool_layer.c           |  79 ++-
 src/maxpool_layer.h           |   3 +-
 src/mini_blas.h               |   2 +-
 src/network.c                 | 130 +----
 src/network.h                 |  15 +-
 src/normalization_layer.c     |   2 +-
 src/opencl.c                  |   9 +
 src/opencl.h                  |   1 +
 src/option_list.c             |   7 +
 src/option_list.h             |   1 +
 src/parser.c                  | 245 +++++++--
 src/parser.h                  |   1 +
 src/softmax_layer.c           |  24 +
 src/softmax_layer.h           |   1 +
 32 files changed, 1044 insertions(+), 746 deletions(-)
 create mode 100644 src/convolutional_layer_gpu.c
 create mode 100644 src/dropout_layer.c
 create mode 100644 src/dropout_layer.h

diff --git a/Makefile b/Makefile
index 9c3043b0..877fc7f0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 CC=gcc
 GPU=0
-COMMON=-Wall -Werror -Wfatal-errors `pkg-config --cflags opencv` -I/usr/local/cuda/include/
+COMMON=-Wall -Wfatal-errors `pkg-config --cflags opencv` -I/usr/local/cuda/include/
 ifeq ($(GPU), 1) 
 COMMON+=-DGPU
 else
@@ -19,13 +19,13 @@ LDFLAGS= -lOpenCL
 endif
 endif
 CFLAGS= $(COMMON) $(OPTS)
-#CFLAGS= $(COMMON) -O0 -g 
+#CFLAGS= $(COMMON) -O0 -g
 LDFLAGS+=`pkg-config --libs opencv` -lm
 VPATH=./src/
 EXEC=cnn
 OBJDIR=./obj/
 
-OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o
+OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o
 OBJS = $(addprefix $(OBJDIR), $(OBJ))
 
 all: $(EXEC)
diff --git a/src/activations.c b/src/activations.c
index 3b117166..04b27c92 100644
--- a/src/activations.c
+++ b/src/activations.c
@@ -41,29 +41,28 @@ float relu_activate(float x){return x*(x>0);}
 float ramp_activate(float x){return x*(x>0)+.1*x;}
 float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
 
-float activate(float x, ACTIVATION a, float dropout)
+float activate(float x, ACTIVATION a)
 {
-    if(dropout && (float)rand()/RAND_MAX < dropout) return 0;
     switch(a){
         case LINEAR:
-            return linear_activate(x)/(1-dropout);
+            return linear_activate(x);
         case SIGMOID:
-            return sigmoid_activate(x)/(1-dropout);
+            return sigmoid_activate(x);
         case RELU:
-            return relu_activate(x)/(1-dropout);
+            return relu_activate(x);
         case RAMP:
-            return ramp_activate(x)/(1-dropout);
+            return ramp_activate(x);
         case TANH:
-            return tanh_activate(x)/(1-dropout);
+            return tanh_activate(x);
     }
     return 0;
 }
 
-void activate_array(float *x, const int n, const ACTIVATION a, float dropout)
+void activate_array(float *x, const int n, const ACTIVATION a)
 {
     int i;
     for(i = 0; i < n; ++i){
-        x[i] = activate(x[i], a, dropout);
+        x[i] = activate(x[i], a);
     }
 }
 
@@ -109,7 +108,7 @@ cl_kernel get_activation_kernel()
 }
 
 
-void activate_array_ongpu(cl_mem x, int n, ACTIVATION a, float dropout) 
+void activate_array_ongpu(cl_mem x, int n, ACTIVATION a) 
 {
     cl_setup();
     cl_kernel kernel = get_activation_kernel();
@@ -119,8 +118,6 @@ void activate_array_ongpu(cl_mem x, int n, ACTIVATION a, float dropout)
     cl.error = clSetKernelArg(kernel, i++, sizeof(x), (void*) &x);
     cl.error = clSetKernelArg(kernel, i++, sizeof(n), (void*) &n);
     cl.error = clSetKernelArg(kernel, i++, sizeof(a), (void*) &a);
-    cl.error = clSetKernelArg(kernel, i++, sizeof(dropout), 
-        (void*) &dropout);
     check_error(cl);
 
     size_t gsize = n;
diff --git a/src/activations.cl b/src/activations.cl
index 6ab135a1..65131c55 100644
--- a/src/activations.cl
+++ b/src/activations.cl
@@ -8,27 +8,26 @@ float relu_activate(float x){return x*(x>0);}
 float ramp_activate(float x){return x*(x>0)+.1*x;}
 float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
 
-float activate(float x, ACTIVATION a, float dropout)
+float activate(float x, ACTIVATION a)
 {
-    //if((float)rand()/RAND_MAX < dropout) return 0;
     switch(a){
         case LINEAR:
-            return linear_activate(x)/(1-dropout);
+            return linear_activate(x);
         case SIGMOID:
-            return sigmoid_activate(x)/(1-dropout);
+            return sigmoid_activate(x);
         case RELU:
-            return relu_activate(x)/(1-dropout);
+            return relu_activate(x);
         case RAMP:
-            return ramp_activate(x)/(1-dropout);
+            return ramp_activate(x);
         case TANH:
-            return tanh_activate(x)/(1-dropout);
+            return tanh_activate(x);
     }
     return 0;
 }
 
 __kernel void activate_array(__global float *x,
-    const int n, const ACTIVATION a, const float dropout)
+    const int n, const ACTIVATION a)
 {
     int i = get_global_id(0);
-    x[i] = activate(x[i], a, dropout);
+    x[i] = activate(x[i], a);
 }
diff --git a/src/activations.h b/src/activations.h
index e47914c5..8c4287e0 100644
--- a/src/activations.h
+++ b/src/activations.h
@@ -9,12 +9,12 @@ typedef enum{
 ACTIVATION get_activation(char *s);
 
 char *get_activation_string(ACTIVATION a);
-float activate(float x, ACTIVATION a, float dropout);
+float activate(float x, ACTIVATION a);
 float gradient(float x, ACTIVATION a);
 void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
-void activate_array(float *x, const int n, const ACTIVATION a, float dropout);
+void activate_array(float *x, const int n, const ACTIVATION a);
 #ifdef GPU
-void activate_array_ongpu(cl_mem x, int n, ACTIVATION a, float dropout);
+void activate_array_ongpu(cl_mem x, int n, ACTIVATION a);
 #endif
 
 #endif
diff --git a/src/cnn.c b/src/cnn.c
index cac11494..f8661942 100644
--- a/src/cnn.c
+++ b/src/cnn.c
@@ -51,7 +51,7 @@ void test_convolve_matrix()
 	int i;
 	clock_t start = clock(), end;
 	for(i = 0; i < 1000; ++i){
-		im2col_cpu(dog.data, dog.c,  dog.h,  dog.w,  size,  stride, 0, matrix);
+		im2col_cpu(dog.data,1, dog.c,  dog.h,  dog.w,  size,  stride, 0, matrix);
 		gemm(0,0,n,mw,mh,1,filters,mh,matrix,mw,1,edge.data,mw);
 	}
 	end = clock();
@@ -75,7 +75,7 @@ void verify_convolutional_layer()
 	int size = 3;
 	float eps = .00000001;
 	image test = make_random_image(5,5, 1);
-	convolutional_layer layer = *make_convolutional_layer(1,test.h,test.w,test.c, n, size, stride, 0, RELU);
+	convolutional_layer layer = *make_convolutional_layer(1,test.h,test.w,test.c, n, size, stride, 0, RELU,0,0,0);
 	image out = get_convolutional_image(layer);
 	float **jacobian = calloc(test.h*test.w*test.c, sizeof(float));
 
@@ -158,25 +158,10 @@ void test_rotate()
 
 void test_parser()
 {
-	network net = parse_network_cfg("test_parser.cfg");
-	float input[1];
-	int count = 0;
-
-	float avgerr = 0;
-	while(++count < 100000000){
-		float v = ((float)rand()/RAND_MAX);
-		float truth = v*v;
-		input[0] = v;
-		forward_network(net, input, 1);
-		float *out = get_network_output(net);
-		float *delta = get_network_delta(net);
-		float err = pow((out[0]-truth),2.);
-		avgerr = .99 * avgerr + .01 * err;
-		if(count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
-		delta[0] = truth - out[0];
-		backward_network(net, input, &truth);
-		update_network(net, .001,0,0);
-	}
+	network net = parse_network_cfg("cfg/test_parser.cfg");
+    save_network(net, "cfg/test_parser_1.cfg");
+	network net2 = parse_network_cfg("cfg/test_parser_1.cfg");
+    save_network(net2, "cfg/test_parser_2.cfg");
 }
 
 void test_data()
@@ -206,7 +191,7 @@ void train_full()
 		//scale_data_rows(train, 1./255.);
 		normalize_data_rows(train);
 		clock_t start = clock(), end;
-		float loss = train_network_sgd(net, train, 1000, lr, momentum, decay);
+		float loss = train_network_sgd(net, train, 1000);
 		end = clock();
 		printf("%d: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", i, loss, (float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
 		free_data(train);
@@ -255,558 +240,567 @@ void test_full()
 
 void test_cifar10()
 {
-	data test = load_cifar10_data("images/cifar10/test_batch.bin");
-	scale_data_rows(test, 1./255);
-	network net = parse_network_cfg("cfg/cifar10.cfg");
-	int count = 0;
-	float lr = .000005;
-	float momentum = .99;
-	float decay = 0.001;
-	decay = 0;
-	int batch = 10000;
-	while(++count <= 10000){
-		char buff[256];
-		sprintf(buff, "images/cifar10/data_batch_%d.bin", rand()%5+1);
-		data train = load_cifar10_data(buff);
-		scale_data_rows(train, 1./255);
-		train_network_sgd(net, train, batch, lr, momentum, decay);
-		//printf("%5f %5f\n",(double)count*batch/train.X.rows, loss);
-
-		float test_acc = network_accuracy(net, test);
-		printf("%5f %5f\n",(double)count*batch/train.X.rows/5, 1-test_acc);
-		free_data(train);
-	}
+	srand(222222);
+    network net = parse_network_cfg("cfg/cifar10.cfg");
+    //data test = load_cifar10_data("data/cifar10/test_batch.bin");
+    int count = 0;
+    int iters = 10000/net.batch;
+    data train = load_all_cifar10();
+    while(++count <= 10000){
+        clock_t start = clock(), end;
+        float loss = train_network_sgd(net, train, iters);
+        end = clock();
+        //visualize_network(net);
+        //cvWaitKey(1000);
 
+        //float test_acc = network_accuracy(net, test);
+        //printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+        printf("%d: Loss: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+    }
+    free_data(train);
 }
 
 void test_vince()
 {
-	network net = parse_network_cfg("cfg/vince.cfg");
-	data train = load_categorical_data_csv("images/vince.txt", 144, 2);
-	normalize_data_rows(train);
+    network net = parse_network_cfg("cfg/vince.cfg");
+    data train = load_categorical_data_csv("images/vince.txt", 144, 2);
+    normalize_data_rows(train);
+
+    int count = 0;
+    //float lr = .00005;
+    //float momentum = .9;
+    //float decay = 0.0001;
+    //decay = 0;
+    int batch = 10000;
+    while(++count <= 10000){
+        float loss = train_network_sgd(net, train, batch);
+        printf("%5f %5f\n",(double)count*batch/train.X.rows, loss);
+    }
+}
+
+void test_nist_single()
+{
+    srand(222222);
+    network net = parse_network_cfg("cfg/nist.cfg");
+    data train = load_categorical_data_csv("data/mnist/mnist_tiny.csv", 0, 10);
+    normalize_data_rows(train);
+    float loss = train_network_sgd(net, train, 5);
+    printf("Loss: %f, LR: %f, Momentum: %f, Decay: %f\n", loss, net.learning_rate, net.momentum, net.decay);
 
-	int count = 0;
-	float lr = .00005;
-	float momentum = .9;
-	float decay = 0.0001;
-	decay = 0;
-	int batch = 10000;
-	while(++count <= 10000){
-		float loss = train_network_sgd(net, train, batch, lr, momentum, decay);
-		printf("%5f %5f\n",(double)count*batch/train.X.rows, loss);
-	}
 }
 
 void test_nist()
 {
-	srand(222222);
-	network net = parse_network_cfg("cfg/nist.cfg");
-	data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
-	data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10);
-	normalize_data_rows(train);
-	normalize_data_rows(test);
-	//randomize_data(train);
-	int count = 0;
-	float lr = .0001;
-	float momentum = .9;
-	float decay = 0.0001;
-	//clock_t start = clock(), end;
-	int iters = 1000;
-	while(++count <= 10){
-		clock_t start = clock(), end;
-		float loss = train_network_sgd(net, train, iters, lr, momentum, decay);
-		end = clock();
-		float test_acc = network_accuracy(net, test);
+    srand(222222);
+    network net = parse_network_cfg("cfg/nist.cfg");
+    data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
+    data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10);
+	translate_data_rows(train, -144);
+	scale_data_rows(train, 1./128);
+	translate_data_rows(test, -144);
+	scale_data_rows(test, 1./128);
+    //randomize_data(train);
+    int count = 0;
+    //clock_t start = clock(), end;
+    int iters = 10000/net.batch;
+    while(++count <= 100){
+        clock_t start = clock(), end;
+        float loss = train_network_sgd(net, train, iters);
+        end = clock();
+        float test_acc = network_accuracy(net, test);
         //float test_acc = 0;
-		printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
+        printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+        //save_network(net, "cfg/nist_basic_trained.cfg");
 
-		//printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay);
-		//end = clock();
-		//printf("Time: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC);
-		//start=end;
-		//lr *= .5;
-	}
-	//save_network(net, "cfg/nist_basic_trained.cfg");
+        //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay);
+        //end = clock();
+        //printf("Time: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC);
+        //start=end;
+        //lr *= .5;
+    }
+    //save_network(net, "cfg/nist_basic_trained.cfg");
 }
 
 void test_ensemble()
 {
-	int i;
-	srand(888888);
-	data d = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
-	normalize_data_rows(d);
-	data test = load_categorical_data_csv("mnist/mnist_test.csv", 0,10);
-	normalize_data_rows(test);
-	data train = d;
-	//   data *split = split_data(d, 1, 10);
-	//   data train = split[0];
-	//   data test = split[1];
-	matrix prediction = make_matrix(test.y.rows, test.y.cols);
-	int n = 30;
-	for(i = 0; i < n; ++i){
-		int count = 0;
-		float lr = .0005;
-		float momentum = .9;
-		float decay = .01;
-		network net = parse_network_cfg("nist.cfg");
-		while(++count <= 15){
-			float acc = train_network_sgd(net, train, train.X.rows, lr, momentum, decay);
-			printf("Training Accuracy: %lf Learning Rate: %f Momentum: %f Decay: %f\n", acc, lr, momentum, decay );
-			lr /= 2; 
-		}
-		matrix partial = network_predict_data(net, test);
-		float acc = matrix_accuracy(test.y, partial);
-		printf("Model Accuracy: %lf\n", acc);
-		matrix_add_matrix(partial, prediction);
-		acc = matrix_accuracy(test.y, prediction);
-		printf("Current Ensemble Accuracy: %lf\n", acc);
-		free_matrix(partial);
-	}
-	float acc = matrix_accuracy(test.y, prediction);
-	printf("Full Ensemble Accuracy: %lf\n", acc);
+    int i;
+    srand(888888);
+    data d = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
+    normalize_data_rows(d);
+    data test = load_categorical_data_csv("mnist/mnist_test.csv", 0,10);
+    normalize_data_rows(test);
+    data train = d;
+    //   data *split = split_data(d, 1, 10);
+    //   data train = split[0];
+    //   data test = split[1];
+    matrix prediction = make_matrix(test.y.rows, test.y.cols);
+    int n = 30;
+    for(i = 0; i < n; ++i){
+        int count = 0;
+        float lr = .0005;
+        float momentum = .9;
+        float decay = .01;
+        network net = parse_network_cfg("nist.cfg");
+        while(++count <= 15){
+            float acc = train_network_sgd(net, train, train.X.rows);
+            printf("Training Accuracy: %lf Learning Rate: %f Momentum: %f Decay: %f\n", acc, lr, momentum, decay );
+            lr /= 2; 
+        }
+        matrix partial = network_predict_data(net, test);
+        float acc = matrix_accuracy(test.y, partial);
+        printf("Model Accuracy: %lf\n", acc);
+        matrix_add_matrix(partial, prediction);
+        acc = matrix_accuracy(test.y, prediction);
+        printf("Current Ensemble Accuracy: %lf\n", acc);
+        free_matrix(partial);
+    }
+    float acc = matrix_accuracy(test.y, prediction);
+    printf("Full Ensemble Accuracy: %lf\n", acc);
 }
 
 void test_random_classify()
 {
-	network net = parse_network_cfg("connected.cfg");
-	matrix m = csv_to_matrix("train.csv");
-	//matrix ho = hold_out_matrix(&m, 2500);
-	float *truth = pop_column(&m, 0);
-	//float *ho_truth = pop_column(&ho, 0);
-	int i;
-	clock_t start = clock(), end;
-	int count = 0;
-	while(++count <= 300){
-		for(i = 0; i < m.rows; ++i){
-			int index = rand()%m.rows;
-			//image p = float_to_image(1690,1,1,m.vals[index]);
-			//normalize_image(p);
-			forward_network(net, m.vals[index], 1);
-			float *out = get_network_output(net);
-			float *delta = get_network_delta(net);
-			//printf("%f\n", out[0]);
-			delta[0] = truth[index] - out[0];
-			// printf("%f\n", delta[0]);
-			//printf("%f %f\n", truth[index], out[0]);
-			//backward_network(net, m.vals[index], );
-			update_network(net, .00001, 0,0);
-		}
-		//float test_acc = error_network(net, m, truth);
-		//float valid_acc = error_network(net, ho, ho_truth);
-		//printf("%f, %f\n", test_acc, valid_acc);
-		//fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc);
-		//if(valid_acc > .70) break;
-	}
-	end = clock();
-	FILE *fp = fopen("submission/out.txt", "w");
-	matrix test = csv_to_matrix("test.csv");
-	truth = pop_column(&test, 0);
-	for(i = 0; i < test.rows; ++i){
-		forward_network(net, test.vals[i], 0);
-		float *out = get_network_output(net);
-		if(fabs(out[0]) < .5) fprintf(fp, "0\n");
-		else fprintf(fp, "1\n");
-	}
-	fclose(fp);
-	printf("Neural Net Learning: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC);
+    network net = parse_network_cfg("connected.cfg");
+    matrix m = csv_to_matrix("train.csv");
+    //matrix ho = hold_out_matrix(&m, 2500);
+    float *truth = pop_column(&m, 0);
+    //float *ho_truth = pop_column(&ho, 0);
+    int i;
+    clock_t start = clock(), end;
+    int count = 0;
+    while(++count <= 300){
+        for(i = 0; i < m.rows; ++i){
+            int index = rand()%m.rows;
+            //image p = float_to_image(1690,1,1,m.vals[index]);
+            //normalize_image(p);
+            forward_network(net, m.vals[index], 1);
+            float *out = get_network_output(net);
+            float *delta = get_network_delta(net);
+            //printf("%f\n", out[0]);
+            delta[0] = truth[index] - out[0];
+            // printf("%f\n", delta[0]);
+            //printf("%f %f\n", truth[index], out[0]);
+            //backward_network(net, m.vals[index], );
+            update_network(net);
+        }
+        //float test_acc = error_network(net, m, truth);
+        //float valid_acc = error_network(net, ho, ho_truth);
+        //printf("%f, %f\n", test_acc, valid_acc);
+        //fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc);
+        //if(valid_acc > .70) break;
+    }
+    end = clock();
+    FILE *fp = fopen("submission/out.txt", "w");
+    matrix test = csv_to_matrix("test.csv");
+    truth = pop_column(&test, 0);
+    for(i = 0; i < test.rows; ++i){
+        forward_network(net, test.vals[i], 0);
+        float *out = get_network_output(net);
+        if(fabs(out[0]) < .5) fprintf(fp, "0\n");
+        else fprintf(fp, "1\n");
+    }
+    fclose(fp);
+    printf("Neural Net Learning: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC);
 }
 
 void test_split()
 {
-	data train = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
-	data *split = split_data(train, 0, 13);
-	printf("%d, %d, %d\n", train.X.rows, split[0].X.rows, split[1].X.rows);
+    data train = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
+    data *split = split_data(train, 0, 13);
+    printf("%d, %d, %d\n", train.X.rows, split[0].X.rows, split[1].X.rows);
 }
 
 void test_im2row()
 {
-	int h = 20;
-	int w = 20;
-	int c = 3;
-	int stride = 1;
-	int size = 11;
-	image test = make_random_image(h,w,c);
-	int mc = 1;
-	int mw = ((h-size)/stride+1)*((w-size)/stride+1);
-	int mh = (size*size*c);
-	int msize = mc*mw*mh;
-	float *matrix = calloc(msize, sizeof(float));
-	int i;
-	for(i = 0; i < 1000; ++i){
-		im2col_cpu(test.data,  c,  h,  w,  size,  stride, 0, matrix);
-		//image render = float_to_image(mh, mw, mc, matrix);
-	}
+    int h = 20;
+    int w = 20;
+    int c = 3;
+    int stride = 1;
+    int size = 11;
+    image test = make_random_image(h,w,c);
+    int mc = 1;
+    int mw = ((h-size)/stride+1)*((w-size)/stride+1);
+    int mh = (size*size*c);
+    int msize = mc*mw*mh;
+    float *matrix = calloc(msize, sizeof(float));
+    int i;
+    for(i = 0; i < 1000; ++i){
+        im2col_cpu(test.data,1,  c,  h,  w,  size,  stride, 0, matrix);
+        //image render = float_to_image(mh, mw, mc, matrix);
+    }
 }
 
 void flip_network()
 {
-	network net = parse_network_cfg("cfg/voc_imagenet_orig.cfg");
-	save_network(net, "cfg/voc_imagenet_rev.cfg");
+    network net = parse_network_cfg("cfg/voc_imagenet_orig.cfg");
+    save_network(net, "cfg/voc_imagenet_rev.cfg");
 }
 
 void tune_VOC()
 {
-	network net = parse_network_cfg("cfg/voc_start.cfg");
-	srand(2222222);
-	int i = 20;
-	char *labels[] = {"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"};
-	float lr = .000005;
-	float momentum = .9;
-	float decay = 0.0001;
-	while(i++ < 1000 || 1){
-		data train = load_data_image_pathfile_random("/home/pjreddie/VOC2012/trainval_paths.txt", 10, labels, 20, 256, 256);
+    network net = parse_network_cfg("cfg/voc_start.cfg");
+    srand(2222222);
+    int i = 20;
+    char *labels[] = {"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"};
+    float lr = .000005;
+    float momentum = .9;
+    float decay = 0.0001;
+    while(i++ < 1000 || 1){
+        data train = load_data_image_pathfile_random("/home/pjreddie/VOC2012/trainval_paths.txt", 10, labels, 20, 256, 256);
 
-		image im = float_to_image(256, 256, 3,train.X.vals[0]);
-		show_image(im, "input");
-		visualize_network(net);
-		cvWaitKey(100);
+        image im = float_to_image(256, 256, 3,train.X.vals[0]);
+        show_image(im, "input");
+        visualize_network(net);
+        cvWaitKey(100);
 
-		translate_data_rows(train, -144);
-		clock_t start = clock(), end;
-		float loss = train_network_sgd(net, train, 10, lr, momentum, decay);
-		end = clock();
-		printf("%d: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", i, loss, (float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
-		free_data(train);
+        translate_data_rows(train, -144);
+        clock_t start = clock(), end;
+        float loss = train_network_sgd(net, train, 10);
+        end = clock();
+        printf("%d: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", i, loss, (float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
+        free_data(train);
         /*
-		if(i%10==0){
-			char buff[256];
-			sprintf(buff, "/home/pjreddie/voc_cfg/voc_ramp_%d.cfg", i);
-			save_network(net, buff);
-		}
-        */
-		//lr *= .99;
-	}
+           if(i%10==0){
+           char buff[256];
+           sprintf(buff, "/home/pjreddie/voc_cfg/voc_ramp_%d.cfg", i);
+           save_network(net, buff);
+           }
+         */
+        //lr *= .99;
+    }
 }
 
 int voc_size(int x)
 {
-	x = x-1+3;
-	x = x-1+3;
-	x = x-1+3;
-	x = (x-1)*2+1;
-	x = x-1+5;
-	x = (x-1)*2+1;
-	x = (x-1)*4+11;
-	return x;
+    x = x-1+3;
+    x = x-1+3;
+    x = x-1+3;
+    x = (x-1)*2+1;
+    x = x-1+5;
+    x = (x-1)*2+1;
+    x = (x-1)*4+11;
+    return x;
 }
 
 image features_output_size(network net, IplImage *src, int outh, int outw)
 {
-	int h = voc_size(outh);
-	int w = voc_size(outw);
-	fprintf(stderr, "%d %d\n", h, w);
+    int h = voc_size(outh);
+    int w = voc_size(outw);
+    fprintf(stderr, "%d %d\n", h, w);
 
-	IplImage *sized = cvCreateImage(cvSize(w,h), src->depth, src->nChannels);
-	cvResize(src, sized, CV_INTER_LINEAR);
-	image im = ipl_to_image(sized);
-	//normalize_array(im.data, im.h*im.w*im.c);
-	translate_image(im, -144);
-	resize_network(net, im.h, im.w, im.c);
-	forward_network(net, im.data, 0);
-	image out = get_network_image(net);
-	free_image(im);
-	cvReleaseImage(&sized);
-	return copy_image(out);
+    IplImage *sized = cvCreateImage(cvSize(w,h), src->depth, src->nChannels);
+    cvResize(src, sized, CV_INTER_LINEAR);
+    image im = ipl_to_image(sized);
+    //normalize_array(im.data, im.h*im.w*im.c);
+    translate_image(im, -144);
+    resize_network(net, im.h, im.w, im.c);
+    forward_network(net, im.data, 0);
+    image out = get_network_image(net);
+    free_image(im);
+    cvReleaseImage(&sized);
+    return copy_image(out);
 }
 
 void features_VOC_image_size(char *image_path, int h, int w)
 {
-	int j;
-	network net = parse_network_cfg("cfg/voc_imagenet.cfg");
-	fprintf(stderr, "%s\n", image_path);
+    int j;
+    network net = parse_network_cfg("cfg/voc_imagenet.cfg");
+    fprintf(stderr, "%s\n", image_path);
 
-	IplImage* src = 0;
-	if( (src = cvLoadImage(image_path,-1)) == 0 ) file_error(image_path);
-	image out = features_output_size(net, src, h, w);
-	for(j = 0; j < out.c*out.h*out.w; ++j){
-		if(j != 0) printf(",");
-		printf("%g", out.data[j]);
-	}
-	printf("\n");
-	free_image(out);
-	cvReleaseImage(&src);
+    IplImage* src = 0;
+    if( (src = cvLoadImage(image_path,-1)) == 0 ) file_error(image_path);
+    image out = features_output_size(net, src, h, w);
+    for(j = 0; j < out.c*out.h*out.w; ++j){
+        if(j != 0) printf(",");
+        printf("%g", out.data[j]);
+    }
+    printf("\n");
+    free_image(out);
+    cvReleaseImage(&src);
 }
 void visualize_imagenet_topk(char *filename)
 {
-	int i,j,k,l;
-	int topk = 10;
-	network net = parse_network_cfg("cfg/voc_imagenet.cfg");
-	list *plist = get_paths(filename);
-	node *n = plist->front;
-	int h = voc_size(1), w = voc_size(1);
-	int num = get_network_image(net).c;
-	image **vizs = calloc(num, sizeof(image*));
-	float **score = calloc(num, sizeof(float *));
-	for(i = 0; i < num; ++i){
-		vizs[i] = calloc(topk, sizeof(image));
-		for(j = 0; j < topk; ++j) vizs[i][j] = make_image(h,w,3);
-		score[i] = calloc(topk, sizeof(float));
-	}
+    int i,j,k,l;
+    int topk = 10;
+    network net = parse_network_cfg("cfg/voc_imagenet.cfg");
+    list *plist = get_paths(filename);
+    node *n = plist->front;
+    int h = voc_size(1), w = voc_size(1);
+    int num = get_network_image(net).c;
+    image **vizs = calloc(num, sizeof(image*));
+    float **score = calloc(num, sizeof(float *));
+    for(i = 0; i < num; ++i){
+        vizs[i] = calloc(topk, sizeof(image));
+        for(j = 0; j < topk; ++j) vizs[i][j] = make_image(h,w,3);
+        score[i] = calloc(topk, sizeof(float));
+    }
 
-	int count = 0;
-	while(n){
-		++count;
-		char *image_path = (char *)n->val;
-		image im = load_image(image_path, 0, 0);
-		n = n->next;
-		if(im.h < 200 || im.w < 200) continue;
-		printf("Processing %dx%d image\n", im.h, im.w);
-		resize_network(net, im.h, im.w, im.c);
-		//scale_image(im, 1./255);
-		translate_image(im, -144);
-		forward_network(net, im.data, 0);
-		image out = get_network_image(net);
+    int count = 0;
+    while(n){
+        ++count;
+        char *image_path = (char *)n->val;
+        image im = load_image(image_path, 0, 0);
+        n = n->next;
+        if(im.h < 200 || im.w < 200) continue;
+        printf("Processing %dx%d image\n", im.h, im.w);
+        resize_network(net, im.h, im.w, im.c);
+        //scale_image(im, 1./255);
+        translate_image(im, -144);
+        forward_network(net, im.data, 0);
+        image out = get_network_image(net);
 
-		int dh = (im.h - h)/(out.h-1);
-		int dw = (im.w - w)/(out.w-1);
-		//printf("%d %d\n", dh, dw);
-		for(k = 0; k < out.c; ++k){
-			float topv = 0;
-			int topi = -1;
-			int topj = -1;
-			for(i = 0; i < out.h; ++i){
-				for(j = 0; j < out.w; ++j){
-					float val = get_pixel(out, i, j, k);
-					if(val > topv){
-						topv = val;
-						topi = i;
-						topj = j;
-					}
-				}
-			}
-			if(topv){
-				image sub = get_sub_image(im, dh*topi, dw*topj, h, w);
-				for(l = 0; l < topk; ++l){
-					if(topv > score[k][l]){
-						float swap = score[k][l];
-						score[k][l] = topv;
-						topv = swap;
+        int dh = (im.h - h)/(out.h-1);
+        int dw = (im.w - w)/(out.w-1);
+        //printf("%d %d\n", dh, dw);
+        for(k = 0; k < out.c; ++k){
+            float topv = 0;
+            int topi = -1;
+            int topj = -1;
+            for(i = 0; i < out.h; ++i){
+                for(j = 0; j < out.w; ++j){
+                    float val = get_pixel(out, i, j, k);
+                    if(val > topv){
+                        topv = val;
+                        topi = i;
+                        topj = j;
+                    }
+                }
+            }
+            if(topv){
+                image sub = get_sub_image(im, dh*topi, dw*topj, h, w);
+                for(l = 0; l < topk; ++l){
+                    if(topv > score[k][l]){
+                        float swap = score[k][l];
+                        score[k][l] = topv;
+                        topv = swap;
 
-						image swapi = vizs[k][l];
-						vizs[k][l] = sub;
-						sub = swapi;
-					}
-				}
-				free_image(sub);
-			}
-		}
-		free_image(im);
-		if(count%50 == 0){
-			image grid = grid_images(vizs, num, topk);
-			//show_image(grid, "IMAGENET Visualization");
-			save_image(grid, "IMAGENET Grid Single Nonorm");
-			free_image(grid);
-		}
-	}
-	//cvWaitKey(0);
+                        image swapi = vizs[k][l];
+                        vizs[k][l] = sub;
+                        sub = swapi;
+                    }
+                }
+                free_image(sub);
+            }
+        }
+        free_image(im);
+        if(count%50 == 0){
+            image grid = grid_images(vizs, num, topk);
+            //show_image(grid, "IMAGENET Visualization");
+            save_image(grid, "IMAGENET Grid Single Nonorm");
+            free_image(grid);
+        }
+    }
+    //cvWaitKey(0);
 }
 
 void visualize_imagenet_features(char *filename)
 {
-	int i,j,k;
-	network net = parse_network_cfg("cfg/voc_imagenet.cfg");
-	list *plist = get_paths(filename);
-	node *n = plist->front;
-	int h = voc_size(1), w = voc_size(1);
-	int num = get_network_image(net).c;
-	image *vizs = calloc(num, sizeof(image));
-	for(i = 0; i < num; ++i) vizs[i] = make_image(h, w, 3);
-	while(n){
-		char *image_path = (char *)n->val;
-		image im = load_image(image_path, 0, 0);
-		printf("Processing %dx%d image\n", im.h, im.w);
-		resize_network(net, im.h, im.w, im.c);
-		forward_network(net, im.data, 0);
-		image out = get_network_image(net);
+    int i,j,k;
+    network net = parse_network_cfg("cfg/voc_imagenet.cfg");
+    list *plist = get_paths(filename);
+    node *n = plist->front;
+    int h = voc_size(1), w = voc_size(1);
+    int num = get_network_image(net).c;
+    image *vizs = calloc(num, sizeof(image));
+    for(i = 0; i < num; ++i) vizs[i] = make_image(h, w, 3);
+    while(n){
+        char *image_path = (char *)n->val;
+        image im = load_image(image_path, 0, 0);
+        printf("Processing %dx%d image\n", im.h, im.w);
+        resize_network(net, im.h, im.w, im.c);
+        forward_network(net, im.data, 0);
+        image out = get_network_image(net);
 
-		int dh = (im.h - h)/h;
-		int dw = (im.w - w)/w;
-		for(i = 0; i < out.h; ++i){
-			for(j = 0; j < out.w; ++j){
-				image sub = get_sub_image(im, dh*i, dw*j, h, w);
-				for(k = 0; k < out.c; ++k){
-					float val = get_pixel(out, i, j, k);
-					//printf("%f, ", val);
-					image sub_c = copy_image(sub);
-					scale_image(sub_c, val);
-					add_into_image(sub_c, vizs[k], 0, 0);
-					free_image(sub_c);
-				}
-				free_image(sub);
-			}
-		}
-		//printf("\n");
-		show_images(vizs, 10, "IMAGENET Visualization");
-		cvWaitKey(1000);
-		n = n->next;
-	}
-	cvWaitKey(0);
+        int dh = (im.h - h)/h;
+        int dw = (im.w - w)/w;
+        for(i = 0; i < out.h; ++i){
+            for(j = 0; j < out.w; ++j){
+                image sub = get_sub_image(im, dh*i, dw*j, h, w);
+                for(k = 0; k < out.c; ++k){
+                    float val = get_pixel(out, i, j, k);
+                    //printf("%f, ", val);
+                    image sub_c = copy_image(sub);
+                    scale_image(sub_c, val);
+                    add_into_image(sub_c, vizs[k], 0, 0);
+                    free_image(sub_c);
+                }
+                free_image(sub);
+            }
+        }
+        //printf("\n");
+        show_images(vizs, 10, "IMAGENET Visualization");
+        cvWaitKey(1000);
+        n = n->next;
+    }
+    cvWaitKey(0);
 }
 
 void visualize_cat()
 {
-	network net = parse_network_cfg("cfg/voc_imagenet.cfg");
-	image im = load_image("data/cat.png", 0, 0);
-	printf("Processing %dx%d image\n", im.h, im.w);
-	resize_network(net, im.h, im.w, im.c);
-	forward_network(net, im.data, 0);
+    network net = parse_network_cfg("cfg/voc_imagenet.cfg");
+    image im = load_image("data/cat.png", 0, 0);
+    printf("Processing %dx%d image\n", im.h, im.w);
+    resize_network(net, im.h, im.w, im.c);
+    forward_network(net, im.data, 0);
 
-	visualize_network(net);
-	cvWaitKey(0);
+    visualize_network(net);
+    cvWaitKey(0);
 }
 
 void features_VOC_image(char *image_file, char *image_dir, char *out_dir, int flip, int interval)
 {
-	int i,j;
-	network net = parse_network_cfg("cfg/voc_imagenet.cfg");
-	char image_path[1024];
-	sprintf(image_path, "%s/%s",image_dir, image_file);
-	char out_path[1024];
-	if (flip)sprintf(out_path, "%s%d/%s_r.txt",out_dir, interval, image_file);
-	else sprintf(out_path, "%s%d/%s.txt",out_dir, interval, image_file);
-	printf("%s\n", image_file);
+    int i,j;
+    network net = parse_network_cfg("cfg/voc_imagenet.cfg");
+    char image_path[1024];
+    sprintf(image_path, "%s/%s",image_dir, image_file);
+    char out_path[1024];
+    if (flip)sprintf(out_path, "%s%d/%s_r.txt",out_dir, interval, image_file);
+    else sprintf(out_path, "%s%d/%s.txt",out_dir, interval, image_file);
+    printf("%s\n", image_file);
 
-	IplImage* src = 0;
-	if( (src = cvLoadImage(image_path,-1)) == 0 ) file_error(image_path);
-	if(flip)cvFlip(src, 0, 1);
-	int w = src->width;
-	int h = src->height;
-	int sbin = 8;
-	double scale = pow(2., 1./interval);
-	int m = (w<h)?w:h;
-	int max_scale = 1+floor((double)log((double)m/(5.*sbin))/log(scale));
-	if(max_scale < interval) error("max_scale must be >= interval");
-	image *ims = calloc(max_scale+interval, sizeof(image));
+    IplImage* src = 0;
+    if( (src = cvLoadImage(image_path,-1)) == 0 ) file_error(image_path);
+    if(flip)cvFlip(src, 0, 1);
+    int w = src->width;
+    int h = src->height;
+    int sbin = 8;
+    double scale = pow(2., 1./interval);
+    int m = (w<h)?w:h;
+    int max_scale = 1+floor((double)log((double)m/(5.*sbin))/log(scale));
+    if(max_scale < interval) error("max_scale must be >= interval");
+    image *ims = calloc(max_scale+interval, sizeof(image));
 
-	for(i = 0; i < interval; ++i){
-		double factor = 1./pow(scale, i);
-		double ih =  round(h*factor);
-		double iw =  round(w*factor);
-		int ex_h = round(ih/4.) - 2;
-		int ex_w = round(iw/4.) - 2;
-		ims[i] = features_output_size(net, src, ex_h, ex_w);
+    for(i = 0; i < interval; ++i){
+        double factor = 1./pow(scale, i);
+        double ih =  round(h*factor);
+        double iw =  round(w*factor);
+        int ex_h = round(ih/4.) - 2;
+        int ex_w = round(iw/4.) - 2;
+        ims[i] = features_output_size(net, src, ex_h, ex_w);
 
-		ih =  round(h*factor);
-		iw =  round(w*factor);
-		ex_h = round(ih/8.) - 2;
-		ex_w = round(iw/8.) - 2;
-		ims[i+interval] = features_output_size(net, src, ex_h, ex_w);
-		for(j = i+interval; j < max_scale; j += interval){
-			factor /= 2.;
-			ih =  round(h*factor);
-			iw =  round(w*factor);
-			ex_h = round(ih/8.) - 2;
-			ex_w = round(iw/8.) - 2;
-			ims[j+interval] = features_output_size(net, src, ex_h, ex_w);
-		}
-	}
-	FILE *fp = fopen(out_path, "w");
-	if(fp == 0) file_error(out_path);
-	for(i = 0; i < max_scale+interval; ++i){
-		image out = ims[i];
-		fprintf(fp, "%d, %d, %d\n",out.c, out.h, out.w);
-		for(j = 0; j < out.c*out.h*out.w; ++j){
-			if(j != 0)fprintf(fp, ",");
-			float o = out.data[j];
-			if(o < 0) o = 0;
-			fprintf(fp, "%g", o);
-		}
-		fprintf(fp, "\n");
-		free_image(out);
-	}
-	free(ims);
-	fclose(fp);
-	cvReleaseImage(&src);
+        ih =  round(h*factor);
+        iw =  round(w*factor);
+        ex_h = round(ih/8.) - 2;
+        ex_w = round(iw/8.) - 2;
+        ims[i+interval] = features_output_size(net, src, ex_h, ex_w);
+        for(j = i+interval; j < max_scale; j += interval){
+            factor /= 2.;
+            ih =  round(h*factor);
+            iw =  round(w*factor);
+            ex_h = round(ih/8.) - 2;
+            ex_w = round(iw/8.) - 2;
+            ims[j+interval] = features_output_size(net, src, ex_h, ex_w);
+        }
+    }
+    FILE *fp = fopen(out_path, "w");
+    if(fp == 0) file_error(out_path);
+    for(i = 0; i < max_scale+interval; ++i){
+        image out = ims[i];
+        fprintf(fp, "%d, %d, %d\n",out.c, out.h, out.w);
+        for(j = 0; j < out.c*out.h*out.w; ++j){
+            if(j != 0)fprintf(fp, ",");
+            float o = out.data[j];
+            if(o < 0) o = 0;
+            fprintf(fp, "%g", o);
+        }
+        fprintf(fp, "\n");
+        free_image(out);
+    }
+    free(ims);
+    fclose(fp);
+    cvReleaseImage(&src);
 }
 
 void test_distribution()
 {
-	IplImage* img = 0;
-	if( (img = cvLoadImage("im_small.jpg",-1)) == 0 ) file_error("im_small.jpg");
-	network net = parse_network_cfg("cfg/voc_features.cfg");
-	int h = img->height/8-2;
-	int w = img->width/8-2;
-	image out = features_output_size(net, img, h, w);
-	int c = out.c;
-	out.c = 1;
-	show_image(out, "output");
-	out.c = c;
-	image input = ipl_to_image(img);
-	show_image(input, "input");
-	CvScalar s;
-	int i,j;
-	image affects = make_image(input.h, input.w, 1);
-	int count = 0;
-	for(i = 0; i<img->height; i += 1){
-		for(j = 0; j < img->width; j += 1){
-			IplImage *copy = cvCloneImage(img);
-			s=cvGet2D(copy,i,j); // get the (i,j) pixel value
-			printf("%d/%d\n", count++, img->height*img->width);
-			s.val[0]=0;
-			s.val[1]=0;
-			s.val[2]=0;
-			cvSet2D(copy,i,j,s); // set the (i,j) pixel value
-			image mod = features_output_size(net, copy, h, w);
-			image dist = image_distance(out, mod);
-			show_image(affects, "affects");
-			cvWaitKey(1);
-			cvReleaseImage(&copy);
-			//affects.data[i*affects.w + j] += dist.data[3*dist.w+5];
-			affects.data[i*affects.w + j] += dist.data[1*dist.w+1];
-			free_image(mod);
-			free_image(dist);
-		}
-	}
-	show_image(affects, "Origins");
-	cvWaitKey(0);
-	cvWaitKey(0);
+    IplImage* img = 0;
+    if( (img = cvLoadImage("im_small.jpg",-1)) == 0 ) file_error("im_small.jpg");
+    network net = parse_network_cfg("cfg/voc_features.cfg");
+    int h = img->height/8-2;
+    int w = img->width/8-2;
+    image out = features_output_size(net, img, h, w);
+    int c = out.c;
+    out.c = 1;
+    show_image(out, "output");
+    out.c = c;
+    image input = ipl_to_image(img);
+    show_image(input, "input");
+    CvScalar s;
+    int i,j;
+    image affects = make_image(input.h, input.w, 1);
+    int count = 0;
+    for(i = 0; i<img->height; i += 1){
+        for(j = 0; j < img->width; j += 1){
+            IplImage *copy = cvCloneImage(img);
+            s=cvGet2D(copy,i,j); // get the (i,j) pixel value
+            printf("%d/%d\n", count++, img->height*img->width);
+            s.val[0]=0;
+            s.val[1]=0;
+            s.val[2]=0;
+            cvSet2D(copy,i,j,s); // set the (i,j) pixel value
+            image mod = features_output_size(net, copy, h, w);
+            image dist = image_distance(out, mod);
+            show_image(affects, "affects");
+            cvWaitKey(1);
+            cvReleaseImage(&copy);
+            //affects.data[i*affects.w + j] += dist.data[3*dist.w+5];
+            affects.data[i*affects.w + j] += dist.data[1*dist.w+1];
+            free_image(mod);
+            free_image(dist);
+        }
+    }
+    show_image(affects, "Origins");
+    cvWaitKey(0);
+    cvWaitKey(0);
 }
 
 
 int main(int argc, char *argv[])
 {
-	//train_full();
-	//test_distribution();
-	//feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+    //train_full();
+    //test_distribution();
+    //feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
 
-	//test_blas();
-	//test_visualize();
-	//test_gpu_blas();
-	//test_blas();
-	//test_convolve_matrix();
-	//    test_im2row();
-	//test_split();
-	//test_ensemble();
-	test_nist();
-	//test_cifar10();
-	//test_vince();
-	//test_full();
-	//tune_VOC();
-	//features_VOC_image(argv[1], argv[2], argv[3], 0);
-	//features_VOC_image(argv[1], argv[2], argv[3], 1);
-	//train_VOC();
-	//features_VOC_image(argv[1], argv[2], argv[3], 0, 4);
-	//features_VOC_image(argv[1], argv[2], argv[3], 1, 4);
-	//features_VOC_image_size(argv[1], atoi(argv[2]), atoi(argv[3]));
-	//visualize_imagenet_features("data/assira/train.list");
-	//visualize_imagenet_topk("data/VOC2012.list");
-	//visualize_cat();
-	//flip_network();
-	//test_visualize();
-	fprintf(stderr, "Success!\n");
-	//test_random_preprocess();
-	//test_random_classify();
-	//test_parser();
-	//test_backpropagate();
-	//test_ann();
-	//test_convolve();
-	//test_upsample();
-	//test_rotate();
-	//test_load();
-	//test_network();
-	//test_convolutional_layer();
-	//verify_convolutional_layer();
-	//test_color();
-	//cvWaitKey(0);
-	return 0;
+    //test_blas();
+    //test_visualize();
+    //test_gpu_blas();
+    //test_blas();
+    //test_convolve_matrix();
+    //    test_im2row();
+    //test_split();
+    //test_ensemble();
+    //test_nist_single();
+    test_nist();
+    //test_cifar10();
+    //test_vince();
+    //test_full();
+    //tune_VOC();
+    //features_VOC_image(argv[1], argv[2], argv[3], 0);
+    //features_VOC_image(argv[1], argv[2], argv[3], 1);
+    //train_VOC();
+    //features_VOC_image(argv[1], argv[2], argv[3], 0, 4);
+    //features_VOC_image(argv[1], argv[2], argv[3], 1, 4);
+    //features_VOC_image_size(argv[1], atoi(argv[2]), atoi(argv[3]));
+    //visualize_imagenet_features("data/assira/train.list");
+    //visualize_imagenet_topk("data/VOC2012.list");
+    //visualize_cat();
+    //flip_network();
+    //test_visualize();
+    //test_parser();
+    fprintf(stderr, "Success!\n");
+    //test_random_preprocess();
+    //test_random_classify();
+    //test_parser();
+    //test_backpropagate();
+    //test_ann();
+    //test_convolve();
+    //test_upsample();
+    //test_rotate();
+    //test_load();
+    //test_network();
+    //test_convolutional_layer();
+    //verify_convolutional_layer();
+    //test_color();
+    //cvWaitKey(0);
+    return 0;
 }
diff --git a/src/col2im.c b/src/col2im.c
index bc15b7bd..fd7de4fa 100644
--- a/src/col2im.c
+++ b/src/col2im.c
@@ -1,4 +1,6 @@
-inline void col2im_set_pixel(float *im, int height, int width, int channels,
+#include <stdio.h>
+#include <math.h>
+inline void col2im_add_pixel(float *im, int height, int width, int channels,
                         int row, int col, int channel, int pad, float val)
 {
     row -= pad;
@@ -6,7 +8,7 @@ inline void col2im_set_pixel(float *im, int height, int width, int channels,
 
     if (row < 0 || col < 0 ||
         row >= height || col >= width) return;
-    im[col + width*(row + channel*height)] = val;
+    im[col + width*(row + channel*height)] += val;
 }
 //This one might be too, can't remember.
 void col2im_cpu(float* data_col,
@@ -31,7 +33,7 @@ void col2im_cpu(float* data_col,
                 int im_row = h_offset + h * stride;
                 int im_col = w_offset + w * stride;
                 double val = data_col[(c * height_col + h) * width_col + w];
-                col2im_set_pixel(data_im, height, width, channels,
+                col2im_add_pixel(data_im, height, width, channels,
                         im_row, im_col, c_im, pad, val);
             }
         }
diff --git a/src/connected_layer.c b/src/connected_layer.c
index bebf2d9d..368fb63c 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -7,15 +7,19 @@
 #include <stdlib.h>
 #include <string.h>
 
-connected_layer *make_connected_layer(int batch, int inputs, int outputs, float dropout, ACTIVATION activation)
+connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay)
 {
     fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
     int i;
     connected_layer *layer = calloc(1, sizeof(connected_layer));
+
+    layer->learning_rate = learning_rate;
+    layer->momentum = momentum;
+    layer->decay = decay;
+
     layer->inputs = inputs;
     layer->outputs = outputs;
     layer->batch=batch;
-    layer->dropout = dropout;
 
     layer->output = calloc(batch*outputs, sizeof(float*));
     layer->delta = calloc(batch*outputs, sizeof(float*));
@@ -25,8 +29,9 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, float
     layer->weight_momentum = calloc(inputs*outputs, sizeof(float));
     layer->weights = calloc(inputs*outputs, sizeof(float));
     float scale = 1./inputs;
+    //scale = .01;
     for(i = 0; i < inputs*outputs; ++i)
-        layer->weights[i] = scale*(rand_uniform());
+        layer->weights[i] = scale*(rand_uniform()-.5);
 
     layer->bias_updates = calloc(outputs, sizeof(float));
     layer->bias_adapt = calloc(outputs, sizeof(float));
@@ -40,25 +45,24 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, float
     return layer;
 }
 
-void update_connected_layer(connected_layer layer, float step, float momentum, float decay)
+void update_connected_layer(connected_layer layer)
 {
     int i;
     for(i = 0; i < layer.outputs; ++i){
-        layer.bias_momentum[i] = step*(layer.bias_updates[i]) + momentum*layer.bias_momentum[i];
+        layer.bias_momentum[i] = layer.learning_rate*(layer.bias_updates[i]) + layer.momentum*layer.bias_momentum[i];
         layer.biases[i] += layer.bias_momentum[i];
     }
     for(i = 0; i < layer.outputs*layer.inputs; ++i){
-        layer.weight_momentum[i] = step*(layer.weight_updates[i] - decay*layer.weights[i]) + momentum*layer.weight_momentum[i];
+        layer.weight_momentum[i] = layer.learning_rate*(layer.weight_updates[i] - layer.decay*layer.weights[i]) + layer.momentum*layer.weight_momentum[i];
         layer.weights[i] += layer.weight_momentum[i];
     }
     memset(layer.bias_updates, 0, layer.outputs*sizeof(float));
     memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(float));
 }
 
-void forward_connected_layer(connected_layer layer, float *input, int train)
+void forward_connected_layer(connected_layer layer, float *input)
 {
     int i;
-    if(!train) layer.dropout = 0;
     for(i = 0; i < layer.batch; ++i){
         memcpy(layer.output+i*layer.outputs, layer.biases, layer.outputs*sizeof(float));
     }
@@ -69,7 +73,7 @@ void forward_connected_layer(connected_layer layer, float *input, int train)
     float *b = layer.weights;
     float *c = layer.output;
     gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
-    activate_array(layer.output, layer.outputs*layer.batch, layer.activation, layer.dropout);
+    activate_array(layer.output, layer.outputs*layer.batch, layer.activation);
 }
 
 void backward_connected_layer(connected_layer layer, float *input, float *delta)
diff --git a/src/connected_layer.h b/src/connected_layer.h
index ff5a0ce4..e9e461c5 100644
--- a/src/connected_layer.h
+++ b/src/connected_layer.h
@@ -4,6 +4,10 @@
 #include "activations.h"
 
 typedef struct{
+    float learning_rate;
+    float momentum;
+    float decay;
+
     int batch;
     int inputs;
     int outputs;
@@ -22,17 +26,15 @@ typedef struct{
     float *output;
     float *delta;
     
-    float dropout;
-
     ACTIVATION activation;
 
 } connected_layer;
 
-connected_layer *make_connected_layer(int batch, int inputs, int outputs, float dropout, ACTIVATION activation);
+connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay);
 
-void forward_connected_layer(connected_layer layer, float *input, int train);
+void forward_connected_layer(connected_layer layer, float *input);
 void backward_connected_layer(connected_layer layer, float *input, float *delta);
-void update_connected_layer(connected_layer layer, float step, float momentum, float decay);
+void update_connected_layer(connected_layer layer);
 
 
 #endif
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index 44e92442..6c7f9470 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -37,11 +37,16 @@ image get_convolutional_delta(convolutional_layer layer)
     return float_to_image(h,w,c,layer.delta);
 }
 
-convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
+convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay)
 {
     int i;
     size = 2*(size/2)+1; //HA! And you thought you'd use an even sized filter...
     convolutional_layer *layer = calloc(1, sizeof(convolutional_layer));
+
+    layer->learning_rate = learning_rate;
+    layer->momentum = momentum;
+    layer->decay = decay;
+
     layer->h = h;
     layer->w = w;
     layer->c = c;
@@ -59,7 +64,8 @@ convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, in
     layer->bias_updates = calloc(n, sizeof(float));
     layer->bias_momentum = calloc(n, sizeof(float));
     float scale = 1./(size*size*c);
-    for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = scale*(rand_uniform());
+    //scale = .0001;
+    for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = scale*(rand_uniform()-.5);
     for(i = 0; i < n; ++i){
         //layer->biases[i] = rand_normal()*scale + scale;
         layer->biases[i] = .5;
@@ -79,7 +85,7 @@ convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, in
     layer->bias_updates_cl = cl_make_array(layer->bias_updates, n);
     layer->bias_momentum_cl = cl_make_array(layer->bias_momentum, n);
 
-    layer->col_image_cl = cl_make_array(layer->col_image, layer.batch*out_h*out_w*size*size*c);
+    layer->col_image_cl = cl_make_array(layer->col_image, layer->batch*out_h*out_w*size*size*c);
     layer->delta_cl = cl_make_array(layer->delta, layer->batch*out_h*out_w*n);
     layer->output_cl = cl_make_array(layer->output, layer->batch*out_h*out_w*n);
     #endif
@@ -136,9 +142,10 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
     float *b = layer.col_image;
     float *c = layer.output;
 
+    im2col_cpu(in, layer.batch, layer.c, layer.h, layer.w, 
+        layer.size, layer.stride, layer.pad, b);
+
     for(i = 0; i < layer.batch; ++i){
-        im2col_cpu(in, layer.c, layer.h, layer.w, 
-            layer.size, layer.stride, layer.pad, b);
         gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
         c += n*m;
         in += layer.h*layer.w*layer.c;
@@ -149,29 +156,9 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
     for(i = 0; i < m*n; ++i) printf("%f, ", layer.output[i]);
     printf("\n");
     */
-    activate_array(layer.output, m*n*layer.batch, layer.activation, 0.);
+    activate_array(layer.output, m*n*layer.batch, layer.activation);
 }
 
-#ifdef GPU
-void forward_convolutional_layer_gpu(convolutional_layer layer, cl_mem in)
-{
-    int m = layer.n;
-    int k = layer.size*layer.size*layer.c;
-    int n = convolutional_out_height(layer)*
-        convolutional_out_width(layer)*
-        layer.batch;
-
-    cl_write_array(layer.filters_cl, layer.filters, m*k);
-    cl_mem a = layer.filters_cl;
-    cl_mem b = layer.col_image_cl;
-    cl_mem c = layer.output_cl;
-    im2col_ongpu(in, layer.batch, layer.c,  layer.h,  layer.w,  layer.size,  layer.stride, b);
-    gemm_ongpu(0,0,m,n,k,1,a,k,b,n,0,c,n);
-    activate_array_ongpu(layer.output_cl, m*n, layer.activation, 0.);
-    cl_read_array(layer.output_cl, layer.output, m*n);
-}
-#endif
-
 void learn_bias_convolutional_layer(convolutional_layer layer)
 {
     int i,b;
@@ -225,15 +212,15 @@ void backward_convolutional_layer(convolutional_layer layer, float *delta)
     }
 }
 
-void update_convolutional_layer(convolutional_layer layer, float step, float momentum, float decay)
+void update_convolutional_layer(convolutional_layer layer)
 {
     int size = layer.size*layer.size*layer.c*layer.n;
-    axpy_cpu(layer.n, step, layer.bias_updates, 1, layer.biases, 1);
-    scal_cpu(layer.n, momentum, layer.bias_updates, 1);
+    axpy_cpu(layer.n, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1);
+    scal_cpu(layer.n,layer.momentum, layer.bias_updates, 1);
 
-    scal_cpu(size, 1.-step*decay, layer.filters, 1);
-    axpy_cpu(size, step, layer.filter_updates, 1, layer.filters, 1);
-    scal_cpu(size, momentum, layer.filter_updates, 1);
+    scal_cpu(size, 1.-layer.learning_rate*layer.decay, layer.filters, 1);
+    axpy_cpu(size, layer.learning_rate, layer.filter_updates, 1, layer.filters, 1);
+    scal_cpu(size, layer.momentum, layer.filter_updates, 1);
 }
 
 
@@ -284,9 +271,29 @@ image *visualize_convolutional_layer(convolutional_layer layer, char *window, im
     image dc = collapse_image_layers(delta, 1);
     char buff[256];
     sprintf(buff, "%s: Output", window);
-    show_image(dc, buff);
-    save_image(dc, buff);
+    //show_image(dc, buff);
+    //save_image(dc, buff);
     free_image(dc);
     return single_filters;
 }
 
+#ifdef GPU
+void forward_convolutional_layer_gpu(convolutional_layer layer, cl_mem in)
+{
+    int m = layer.n;
+    int k = layer.size*layer.size*layer.c;
+    int n = convolutional_out_height(layer)*
+        convolutional_out_width(layer)*
+        layer.batch;
+
+    cl_write_array(layer.filters_cl, layer.filters, m*k);
+    cl_mem a = layer.filters_cl;
+    cl_mem b = layer.col_image_cl;
+    cl_mem c = layer.output_cl;
+    im2col_ongpu(in, layer.batch, layer.c,  layer.h,  layer.w,  layer.size,  layer.stride, b);
+    gemm_ongpu(0,0,m,n,k,1,a,k,b,n,0,c,n);
+    activate_array_ongpu(layer.output_cl, m*n, layer.activation);
+    cl_read_array(layer.output_cl, layer.output, m*n);
+}
+#endif
+
diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h
index e0722f8d..f876e8b4 100644
--- a/src/convolutional_layer.h
+++ b/src/convolutional_layer.h
@@ -9,6 +9,10 @@
 #include "activations.h"
 
 typedef struct {
+    float learning_rate;
+    float momentum;
+    float decay;
+
     int batch;
     int h,w,c;
     int n;
@@ -48,10 +52,10 @@ typedef struct {
 void forward_convolutional_layer_gpu(convolutional_layer layer, cl_mem in);
 #endif
 
-convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation);
+convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay);
 void resize_convolutional_layer(convolutional_layer *layer, int h, int w, int c);
 void forward_convolutional_layer(const convolutional_layer layer, float *in);
-void update_convolutional_layer(convolutional_layer layer, float step, float momentum, float decay);
+void update_convolutional_layer(convolutional_layer layer);
 image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters);
 
 void backward_convolutional_layer(convolutional_layer layer, float *delta);
diff --git a/src/convolutional_layer_gpu.c b/src/convolutional_layer_gpu.c
new file mode 100644
index 00000000..e69de29b
diff --git a/src/data.c b/src/data.c
index 30ee9ecb..846b950a 100644
--- a/src/data.c
+++ b/src/data.c
@@ -131,6 +131,7 @@ data load_cifar10_data(char *filename)
     d.y = y;
 
     FILE *fp = fopen(filename, "rb");
+    if(!fp) file_error(filename);
     for(i = 0; i < 10000; ++i){
         unsigned char bytes[3073];
         fread(bytes, 1, 3073, fp);
@@ -140,10 +141,46 @@ data load_cifar10_data(char *filename)
             X.vals[i][j] = (double)bytes[j+1];
         }
     }
+	translate_data_rows(d, -144);
+	scale_data_rows(d, 1./128);
+	//normalize_data_rows(d);
     fclose(fp);
     return d;
 }
 
+data load_all_cifar10()
+{
+    data d;
+    d.shallow = 0;
+    int i,j,b;
+    matrix X = make_matrix(50000, 3072);
+    matrix y = make_matrix(50000, 10);
+    d.X = X;
+    d.y = y;
+
+    
+    for(b = 0; b < 5; ++b){
+        char buff[256];
+        sprintf(buff, "data/cifar10/data_batch_%d.bin", b+1);
+        FILE *fp = fopen(buff, "rb");
+        if(!fp) file_error(buff);
+        for(i = 0; i < 10000; ++i){
+            unsigned char bytes[3073];
+            fread(bytes, 1, 3073, fp);
+            int class = bytes[0];
+            y.vals[i+b*10000][class] = 1;
+            for(j = 0; j < X.cols; ++j){
+                X.vals[i+b*10000][j] = (double)bytes[j+1];
+            }
+        }
+        fclose(fp);
+    }
+    //normalize_data_rows(d);
+	translate_data_rows(d, -144);
+	scale_data_rows(d, 1./128);
+    return d;
+}
+
 void randomize_data(data d)
 {
     int i;
diff --git a/src/data.h b/src/data.h
index c639d5fa..0a1830e6 100644
--- a/src/data.h
+++ b/src/data.h
@@ -18,6 +18,7 @@ data load_data_image_pathfile_part(char *filename, int part, int total,
 data load_data_image_pathfile_random(char *filename, int n, char **labels, 
                                         int k, int h, int w);
 data load_cifar10_data(char *filename);
+data load_all_cifar10();
 list *get_paths(char *filename);
 data load_categorical_data_csv(char *filename, int target, int k);
 void normalize_data_rows(data d);
diff --git a/src/dropout_layer.c b/src/dropout_layer.c
new file mode 100644
index 00000000..fcad7b9e
--- /dev/null
+++ b/src/dropout_layer.c
@@ -0,0 +1,26 @@
+#include "dropout_layer.h"
+#include "stdlib.h"
+#include "stdio.h"
+
+dropout_layer *make_dropout_layer(int batch, int inputs, float probability)
+{
+    fprintf(stderr, "Dropout Layer: %d inputs, %f probability\n", inputs, probability);
+    dropout_layer *layer = calloc(1, sizeof(dropout_layer));
+    layer->probability = probability;
+    layer->inputs = inputs;
+    layer->batch = batch;
+    return layer;
+} 
+
+void forward_dropout_layer(dropout_layer layer, float *input)
+{
+    int i;
+    for(i = 0; i < layer.batch * layer.inputs; ++i){
+        if((float)rand()/RAND_MAX < layer.probability) input[i] = 0;
+        else input[i] /= (1-layer.probability);
+    }
+}
+void backward_dropout_layer(dropout_layer layer, float *input, float *delta)
+{
+    // Don't do shit LULZ
+}
diff --git a/src/dropout_layer.h b/src/dropout_layer.h
new file mode 100644
index 00000000..b164a921
--- /dev/null
+++ b/src/dropout_layer.h
@@ -0,0 +1,15 @@
+#ifndef DROPOUT_LAYER_H
+#define DROPOUT_LAYER_H
+
+typedef struct{
+    int batch;
+    int inputs;
+    float probability;
+} dropout_layer;
+
+dropout_layer *make_dropout_layer(int batch, int inputs, float probability);
+
+void forward_dropout_layer(dropout_layer layer, float *input);
+void backward_dropout_layer(dropout_layer layer, float *input, float *delta);
+
+#endif
diff --git a/src/im2col.c b/src/im2col.c
index 89748c90..6ed9d891 100644
--- a/src/im2col.c
+++ b/src/im2col.c
@@ -51,11 +51,11 @@ void im2col_cpu_batch(float* data_im,
 
 //From Berkeley Vision's Caffe!
 //https://github.com/BVLC/caffe/blob/master/LICENSE
-void im2col_cpu(float* data_im,
+void im2col_cpu(float* data_im, const int batch,
     const int channels, const int height, const int width,
     const int ksize, const int stride, int pad, float* data_col) 
 {
-    int c,h,w;
+    int c,h,w,b;
     int height_col = (height - ksize) / stride + 1;
     int width_col = (width - ksize) / stride + 1;
     if (pad){
@@ -64,19 +64,25 @@ void im2col_cpu(float* data_im,
         pad = ksize/2;
     }
     int channels_col = channels * ksize * ksize;
-    for (c = 0; c < channels_col; ++c) {
-        int w_offset = c % ksize;
-        int h_offset = (c / ksize) % ksize;
-        int c_im = c / ksize / ksize;
-        for (h = 0; h < height_col; ++h) {
-            for (w = 0; w < width_col; ++w) {
-                int im_row = h_offset + h * stride;
-                int im_col = w_offset + w * stride;
-                int col_index = (c * height_col + h) * width_col + w;
-                data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
-                        im_row, im_col, c_im, pad);
+    int im_size = height*width*channels;
+    int col_size = height_col*width_col*channels_col;
+    for (b = 0; b < batch; ++b) {
+        for (c = 0; c < channels_col; ++c) {
+            int w_offset = c % ksize;
+            int h_offset = (c / ksize) % ksize;
+            int c_im = c / ksize / ksize;
+            for (h = 0; h < height_col; ++h) {
+                for (w = 0; w < width_col; ++w) {
+                    int im_row = h_offset + h * stride;
+                    int im_col = w_offset + w * stride;
+                    int col_index = (c * height_col + h) * width_col + w;
+                    data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
+                            im_row, im_col, c_im, pad);
+                }
             }
         }
+        data_im += im_size;
+        data_col += col_size;
     }
 }
 
diff --git a/src/im2col.cl b/src/im2col.cl
index 0226d282..765a92df 100644
--- a/src/im2col.cl
+++ b/src/im2col.cl
@@ -1,7 +1,7 @@
 
-__kernel void im2col(__global float *data_im,
-    const int batch, const int channels, const int height, const int width,
-    const int ksize, const int stride, __global float *data_col) 
+__kernel void im2col(__global float *data_im, const int im_offset,
+    const int channels, const int height, const int width,
+    const int ksize, const int stride, __global float *data_col, const int col_offset) 
 {
     int b = get_global_id(0);
     int c = get_global_id(1);
diff --git a/src/image.c b/src/image.c
index e2c451b7..b25bf05b 100644
--- a/src/image.c
+++ b/src/image.c
@@ -138,7 +138,7 @@ void show_image(image p, char *name)
     }
     free_image(copy);
     if(disp->height < 500 || disp->width < 500 || disp->height > 1000){
-        int w = 1500;
+        int w = 500;
         int h = w*p.h/p.w;
         if(h > 1000){
             h = 1000;
@@ -720,7 +720,7 @@ image collapse_images_horz(image *ims, int n)
 void show_images(image *ims, int n, char *window)
 {
     image m = collapse_images_vert(ims, n);
-    save_image(m, window);
+    //save_image(m, window);
     show_image(m, window);
     free_image(m);
 }
diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c
index 54a734a8..08c9f2f2 100644
--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@@ -17,14 +17,15 @@ image get_maxpool_delta(maxpool_layer layer)
     return float_to_image(h,w,c,layer.delta);
 }
 
-maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int stride)
+maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int size, int stride)
 {
-    fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride);
+    fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d size, %d stride\n", h,w,c,size,stride);
     maxpool_layer *layer = calloc(1, sizeof(maxpool_layer));
     layer->batch = batch;
     layer->h = h;
     layer->w = w;
     layer->c = c;
+    layer->size = size;
     layer->stride = stride;
     layer->output = calloc(((h-1)/stride+1) * ((w-1)/stride+1) * c*batch, sizeof(float));
     layer->delta = calloc(((h-1)/stride+1) * ((w-1)/stride+1) * c*batch, sizeof(float));
@@ -40,6 +41,32 @@ void resize_maxpool_layer(maxpool_layer *layer, int h, int w, int c)
     layer->delta = realloc(layer->delta, ((h-1)/layer->stride+1) * ((w-1)/layer->stride+1) * c * layer->batch*sizeof(float));
 }
 
+float get_max_region(image im, int h, int w, int c, int size)
+{
+    int i,j;
+    int lower = (-size-1)/2 + 1;
+    int upper = size/2 + 1;
+    
+    int lh = (h-lower < 0)      ? 0 : h-lower;
+    int uh = (h+upper > im.h)   ? im.h : h+upper;
+
+    int lw = (w-lower < 0)      ? 0 : w-lower;
+    int uw = (w+upper > im.w)   ? im.w : w+upper;
+    
+    //printf("%d\n", -3/2);
+    //printf("%d %d\n", lower, upper);
+    //printf("%d %d %d %d\n", lh, uh, lw, uw);
+    
+    float max = -FLT_MAX;
+    for(i = lh; i < uh; ++i){
+        for(j = lw; j < uw; ++j){
+            float val = get_pixel(im, i, j, c);
+            if (val > max) max = val;
+        }
+    }
+    return max;
+}
+
 void forward_maxpool_layer(const maxpool_layer layer, float *in)
 {
     int b;
@@ -52,19 +79,40 @@ void forward_maxpool_layer(const maxpool_layer layer, float *in)
         image output = float_to_image(h,w,c,layer.output+b*h*w*c);
 
         int i,j,k;
-        for(i = 0; i < output.h*output.w*output.c; ++i) output.data[i] = -DBL_MAX;
         for(k = 0; k < input.c; ++k){
-            for(i = 0; i < input.h; ++i){
-                for(j = 0; j < input.w; ++j){
-                    float val = get_pixel(input, i, j, k);
-                    float cur = get_pixel(output, i/layer.stride, j/layer.stride, k);
-                    if(val > cur) set_pixel(output, i/layer.stride, j/layer.stride, k, val);
+            for(i = 0; i < input.h; i += layer.stride){
+                for(j = 0; j < input.w; j += layer.stride){
+                    float max = get_max_region(input, i, j, k, layer.size);
+                    set_pixel(output, i/layer.stride, j/layer.stride, k, max);
                 }
             }
         }
     }
 }
 
+float set_max_region_delta(image im, image delta, int h, int w, int c, int size, float max, float error)
+{
+    int i,j;
+    int lower = (-size-1)/2 + 1;
+    int upper = size/2 + 1;
+    
+    int lh = (h-lower < 0)      ? 0 : h-lower;
+    int uh = (h+upper > im.h)   ? im.h : h+upper;
+
+    int lw = (w-lower < 0)      ? 0 : w-lower;
+    int uw = (w+upper > im.w)   ? im.w : w+upper;
+    
+    for(i = lh; i < uh; ++i){
+        for(j = lw; j < uw; ++j){
+            float val = get_pixel(im, i, j, c);
+            if (val == max){
+               add_pixel(delta, i, j, c, error);
+            }
+        }
+    }
+    return max;
+}
+
 void backward_maxpool_layer(const maxpool_layer layer, float *in, float *delta)
 {
     int b;
@@ -76,18 +124,15 @@ void backward_maxpool_layer(const maxpool_layer layer, float *in, float *delta)
         int c = layer.c;
         image output = float_to_image(h,w,c,layer.output+b*h*w*c);
         image output_delta = float_to_image(h,w,c,layer.delta+b*h*w*c);
+        zero_image(input_delta);
 
         int i,j,k;
         for(k = 0; k < input.c; ++k){
-            for(i = 0; i < input.h; ++i){
-                for(j = 0; j < input.w; ++j){
-                    float val = get_pixel(input, i, j, k);
-                    float cur = get_pixel(output, i/layer.stride, j/layer.stride, k);
-                    float d = get_pixel(output_delta, i/layer.stride, j/layer.stride, k);
-                    if(val == cur) {
-                        set_pixel(input_delta, i, j, k, d);
-                    }
-                    else set_pixel(input_delta, i, j, k, 0);
+            for(i = 0; i < input.h; i += layer.stride){
+                for(j = 0; j < input.w; j += layer.stride){
+                    float max = get_pixel(output, i/layer.stride, j/layer.stride, k);
+                    float error = get_pixel(output_delta, i/layer.stride, j/layer.stride, k);
+                    set_max_region_delta(input, input_delta, i, j, k, layer.size, max, error);
                 }
             }
         }
diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h
index 92d41e66..cde84458 100644
--- a/src/maxpool_layer.h
+++ b/src/maxpool_layer.h
@@ -7,12 +7,13 @@ typedef struct {
     int batch;
     int h,w,c;
     int stride;
+    int size;
     float *delta;
     float *output;
 } maxpool_layer;
 
 image get_maxpool_image(maxpool_layer layer);
-maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int stride);
+maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int size, int stride);
 void resize_maxpool_layer(maxpool_layer *layer, int h, int w, int c);
 void forward_maxpool_layer(const maxpool_layer layer, float *in);
 void backward_maxpool_layer(const maxpool_layer layer, float *in, float *delta);
diff --git a/src/mini_blas.h b/src/mini_blas.h
index 95e924bf..c80e6ad5 100644
--- a/src/mini_blas.h
+++ b/src/mini_blas.h
@@ -25,7 +25,7 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
         cl_mem C_gpu, int ldc);
 #endif
 
-void im2col_cpu(float* data_im,
+void im2col_cpu(float* data_im, const int batch,
     const int channels, const int height, const int width,
     const int ksize, const int stride, int pad, float* data_col);
 
diff --git a/src/network.c b/src/network.c
index 70883989..ed927a8c 100644
--- a/src/network.c
+++ b/src/network.c
@@ -9,6 +9,7 @@
 #include "maxpool_layer.h"
 #include "normalization_layer.h"
 #include "softmax_layer.h"
+#include "dropout_layer.h"
 
 network make_network(int n, int batch)
 {
@@ -25,94 +26,6 @@ network make_network(int n, int batch)
     return net;
 }
 
-void print_convolutional_cfg(FILE *fp, convolutional_layer *l, int first)
-{
-    int i;
-    fprintf(fp, "[convolutional]\n");
-    if(first) fprintf(fp,   "batch=%d\n"
-                            "height=%d\n"
-                            "width=%d\n"
-                            "channels=%d\n",
-                            l->batch,l->h, l->w, l->c);
-    fprintf(fp, "filters=%d\n"
-                "size=%d\n"
-                "stride=%d\n"
-                "activation=%s\n",
-                l->n, l->size, l->stride,
-                get_activation_string(l->activation));
-    fprintf(fp, "data=");
-    for(i = 0; i < l->n; ++i) fprintf(fp, "%g,", l->biases[i]);
-    for(i = 0; i < l->n*l->c*l->size*l->size; ++i) fprintf(fp, "%g,", l->filters[i]);
-    fprintf(fp, "\n\n");
-}
-void print_connected_cfg(FILE *fp, connected_layer *l, int first)
-{
-    int i;
-    fprintf(fp, "[connected]\n");
-    if(first) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
-    fprintf(fp, "output=%d\n"
-            "activation=%s\n",
-            l->outputs,
-            get_activation_string(l->activation));
-    fprintf(fp, "data=");
-    for(i = 0; i < l->outputs; ++i) fprintf(fp, "%g,", l->biases[i]);
-    for(i = 0; i < l->inputs*l->outputs; ++i) fprintf(fp, "%g,", l->weights[i]);
-    fprintf(fp, "\n\n");
-}
-
-void print_maxpool_cfg(FILE *fp, maxpool_layer *l, int first)
-{
-    fprintf(fp, "[maxpool]\n");
-    if(first) fprintf(fp,   "batch=%d\n"
-            "height=%d\n"
-            "width=%d\n"
-            "channels=%d\n",
-            l->batch,l->h, l->w, l->c);
-    fprintf(fp, "stride=%d\n\n", l->stride);
-}
-
-void print_normalization_cfg(FILE *fp, normalization_layer *l, int first)
-{
-    fprintf(fp, "[localresponsenormalization]\n");
-    if(first) fprintf(fp,   "batch=%d\n"
-            "height=%d\n"
-            "width=%d\n"
-            "channels=%d\n",
-            l->batch,l->h, l->w, l->c);
-    fprintf(fp, "size=%d\n"
-                "alpha=%g\n"
-                "beta=%g\n"
-                "kappa=%g\n\n", l->size, l->alpha, l->beta, l->kappa);
-}
-
-void print_softmax_cfg(FILE *fp, softmax_layer *l, int first)
-{
-    fprintf(fp, "[softmax]\n");
-    if(first) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
-    fprintf(fp, "\n");
-}
-
-void save_network(network net, char *filename)
-{
-    FILE *fp = fopen(filename, "w");
-    if(!fp) file_error(filename);
-    int i;
-    for(i = 0; i < net.n; ++i)
-    {
-        if(net.types[i] == CONVOLUTIONAL)
-            print_convolutional_cfg(fp, (convolutional_layer *)net.layers[i], i==0);
-        else if(net.types[i] == CONNECTED)
-            print_connected_cfg(fp, (connected_layer *)net.layers[i], i==0);
-        else if(net.types[i] == MAXPOOL)
-            print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], i==0);
-        else if(net.types[i] == NORMALIZATION)
-            print_normalization_cfg(fp, (normalization_layer *)net.layers[i], i==0);
-        else if(net.types[i] == SOFTMAX)
-            print_softmax_cfg(fp, (softmax_layer *)net.layers[i], i==0);
-    }
-    fclose(fp);
-}
-
 #ifdef GPU
 void forward_network(network net, float *input, int train)
 {
@@ -169,7 +82,7 @@ void forward_network(network net, float *input, int train)
         }
         else if(net.types[i] == CONNECTED){
             connected_layer layer = *(connected_layer *)net.layers[i];
-            forward_connected_layer(layer, input, train);
+            forward_connected_layer(layer, input);
             input = layer.output;
         }
         else if(net.types[i] == SOFTMAX){
@@ -187,17 +100,22 @@ void forward_network(network net, float *input, int train)
             forward_normalization_layer(layer, input);
             input = layer.output;
         }
+        else if(net.types[i] == DROPOUT){
+            if(!train) continue;
+            dropout_layer layer = *(dropout_layer *)net.layers[i];
+            forward_dropout_layer(layer, input);
+        }
     }
 }
 #endif
 
-void update_network(network net, float step, float momentum, float decay)
+void update_network(network net)
 {
     int i;
     for(i = 0; i < net.n; ++i){
         if(net.types[i] == CONVOLUTIONAL){
             convolutional_layer layer = *(convolutional_layer *)net.layers[i];
-            update_convolutional_layer(layer, step, momentum, decay);
+            update_convolutional_layer(layer);
         }
         else if(net.types[i] == MAXPOOL){
             //maxpool_layer layer = *(maxpool_layer *)net.layers[i];
@@ -210,7 +128,7 @@ void update_network(network net, float step, float momentum, float decay)
         }
         else if(net.types[i] == CONNECTED){
             connected_layer layer = *(connected_layer *)net.layers[i];
-            update_connected_layer(layer, step, momentum, decay);
+            update_connected_layer(layer);
         }
     }
 }
@@ -226,6 +144,8 @@ float *get_network_output_layer(network net, int i)
     } else if(net.types[i] == SOFTMAX){
         softmax_layer layer = *(softmax_layer *)net.layers[i];
         return layer.output;
+    } else if(net.types[i] == DROPOUT){
+        return get_network_output_layer(net, i-1);
     } else if(net.types[i] == CONNECTED){
         connected_layer layer = *(connected_layer *)net.layers[i];
         return layer.output;
@@ -251,6 +171,8 @@ float *get_network_delta_layer(network net, int i)
     } else if(net.types[i] == SOFTMAX){
         softmax_layer layer = *(softmax_layer *)net.layers[i];
         return layer.delta;
+    } else if(net.types[i] == DROPOUT){
+        return get_network_delta_layer(net, i-1);
     } else if(net.types[i] == CONNECTED){
         connected_layer layer = *(connected_layer *)net.layers[i];
         return layer.delta;
@@ -326,17 +248,17 @@ float backward_network(network net, float *input, float *truth)
     return error;
 }
 
-float train_network_datum(network net, float *x, float *y, float step, float momentum, float decay)
+float train_network_datum(network net, float *x, float *y)
 {
     forward_network(net, x, 1);
     //int class = get_predicted_class_network(net);
     float error = backward_network(net, x, y);
-    update_network(net, step, momentum, decay);
+    update_network(net);
     //return (y[class]?1:0);
     return error;
 }
 
-float train_network_sgd(network net, data d, int n, float step, float momentum,float decay)
+float train_network_sgd(network net, data d, int n)
 {
     int batch = net.batch;
     float *X = calloc(batch*d.X.cols, sizeof(float));
@@ -350,9 +272,9 @@ float train_network_sgd(network net, data d, int n, float step, float momentum,f
             memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float));
             memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float));
         }
-        float err = train_network_datum(net, X, y, step, momentum, decay);
+        float err = train_network_datum(net, X, y);
         sum += err;
-        //train_network_datum(net, X, y, step, momentum, decay);
+        //train_network_datum(net, X, y);
         /*
         float *y = d.y.vals[index];
         int class = get_predicted_class_network(net);
@@ -382,7 +304,7 @@ float train_network_sgd(network net, data d, int n, float step, float momentum,f
     free(y);
     return (float)sum/(n*batch);
 }
-float train_network_batch(network net, data d, int n, float step, float momentum,float decay)
+float train_network_batch(network net, data d, int n)
 {
     int i,j;
     float sum = 0;
@@ -395,18 +317,18 @@ float train_network_batch(network net, data d, int n, float step, float momentum
             forward_network(net, x, 1);
             sum += backward_network(net, x, y);
         }
-        update_network(net, step, momentum, decay);
+        update_network(net);
     }
     return (float)sum/(n*batch);
 }
 
 
-void train_network(network net, data d, float step, float momentum, float decay)
+void train_network(network net, data d)
 {
     int i;
     int correct = 0;
     for(i = 0; i < d.X.rows; ++i){
-        correct += train_network_datum(net, d.X.vals[i], d.y.vals[i], step, momentum, decay);
+        correct += train_network_datum(net, d.X.vals[i], d.y.vals[i]);
         if(i%100 == 0){
             visualize_network(net);
             cvWaitKey(10);
@@ -430,6 +352,9 @@ int get_network_input_size_layer(network net, int i)
     else if(net.types[i] == CONNECTED){
         connected_layer layer = *(connected_layer *)net.layers[i];
         return layer.inputs;
+    } else if(net.types[i] == DROPOUT){
+        dropout_layer layer = *(dropout_layer *) net.layers[i];
+        return layer.inputs;
     }
     else if(net.types[i] == SOFTMAX){
         softmax_layer layer = *(softmax_layer *)net.layers[i];
@@ -453,6 +378,9 @@ int get_network_output_size_layer(network net, int i)
     else if(net.types[i] == CONNECTED){
         connected_layer layer = *(connected_layer *)net.layers[i];
         return layer.outputs;
+    } else if(net.types[i] == DROPOUT){
+        dropout_layer layer = *(dropout_layer *) net.layers[i];
+        return layer.inputs;
     }
     else if(net.types[i] == SOFTMAX){
         softmax_layer layer = *(softmax_layer *)net.layers[i];
diff --git a/src/network.h b/src/network.h
index 35a58ca9..a9a6797d 100644
--- a/src/network.h
+++ b/src/network.h
@@ -11,12 +11,16 @@ typedef enum {
     CONNECTED,
     MAXPOOL,
     SOFTMAX,
-    NORMALIZATION
+    NORMALIZATION,
+    DROPOUT
 } LAYER_TYPE;
 
 typedef struct {
     int n;
     int batch;
+    float learning_rate;
+    float momentum;
+    float decay;
     void **layers;
     LAYER_TYPE *types;
     int outputs;
@@ -31,10 +35,10 @@ typedef struct {
 network make_network(int n, int batch);
 void forward_network(network net, float *input, int train);
 float backward_network(network net, float *input, float *truth);
-void update_network(network net, float step, float momentum, float decay);
-float train_network_sgd(network net, data d, int n, float step, float momentum,float decay);
-float train_network_batch(network net, data d, int n, float step, float momentum,float decay);
-void train_network(network net, data d, float step, float momentum, float decay);
+void update_network(network net);
+float train_network_sgd(network net, data d, int n);
+float train_network_batch(network net, data d, int n);
+void train_network(network net, data d);
 matrix network_predict_data(network net, data test);
 float network_accuracy(network net, data d);
 float *get_network_output(network net);
@@ -48,7 +52,6 @@ image get_network_image_layer(network net, int i);
 int get_predicted_class_network(network net);
 void print_network(network net);
 void visualize_network(network net);
-void save_network(network net, char *filename);
 int resize_network(network net, int h, int w, int c);
 int get_network_input_size(network net);
 
diff --git a/src/normalization_layer.c b/src/normalization_layer.c
index 2d844e0e..67d873c9 100644
--- a/src/normalization_layer.c
+++ b/src/normalization_layer.c
@@ -72,7 +72,7 @@ void forward_normalization_layer(const normalization_layer layer, float *in)
         int next = k+layer.size/2;
         int prev = k-layer.size/2-1;
         if(next < layer.c) add_square_array(in+next*imsize, layer.sums, imsize);
-        if(prev > 0)        sub_square_array(in+prev*imsize, layer.sums, imsize);
+        if(prev > 0)       sub_square_array(in+prev*imsize, layer.sums, imsize);
         for(i = 0; i < imsize; ++i){
             layer.output[k*imsize + i] = in[k*imsize+i] / pow(layer.kappa + layer.alpha * layer.sums[i], layer.beta);
         }
diff --git a/src/opencl.c b/src/opencl.c
index d78537b4..8f9edd3c 100644
--- a/src/opencl.c
+++ b/src/opencl.c
@@ -110,6 +110,15 @@ void cl_copy_array(cl_mem src, cl_mem dst, int n)
     check_error(cl);
 }
 
+cl_mem cl_sub_array(cl_mem src, int offset, int size)
+{
+    cl_buffer_region r;
+    r.origin = offset*sizeof(float);
+    r.size = size*sizeof(float);
+    cl_mem sub = clCreateSubBuffer(src, CL_MEM_USE_HOST_PTR, CL_BUFFER_CREATE_TYPE_REGION, &r, 0);
+    return sub;
+}
+
 cl_mem cl_make_array(float *x, int n)
 {
     cl_setup();
diff --git a/src/opencl.h b/src/opencl.h
index a7ee0bdb..9cf3acd4 100644
--- a/src/opencl.h
+++ b/src/opencl.h
@@ -25,5 +25,6 @@ void cl_read_array(cl_mem mem, float *x, int n);
 void cl_write_array(cl_mem mem, float *x, int n);
 cl_mem cl_make_array(float *x, int n);
 void cl_copy_array(cl_mem src, cl_mem dst, int n);
+cl_mem cl_sub_array(cl_mem src, int offset, int size);
 #endif
 #endif
diff --git a/src/option_list.c b/src/option_list.c
index bb8b7101..76e10166 100644
--- a/src/option_list.c
+++ b/src/option_list.c
@@ -53,6 +53,13 @@ int option_find_int(list *l, char *key, int def)
     return def;
 }
 
+float option_find_float_quiet(list *l, char *key, float def)
+{
+    char *v = option_find(l, key);
+    if(v) return atof(v);
+    return def;
+}
+
 float option_find_float(list *l, char *key, float def)
 {
     char *v = option_find(l, key);
diff --git a/src/option_list.h b/src/option_list.h
index 26cd36fc..fa795f3e 100644
--- a/src/option_list.h
+++ b/src/option_list.h
@@ -14,6 +14,7 @@ char *option_find(list *l, char *key);
 char *option_find_str(list *l, char *key, char *def);
 int option_find_int(list *l, char *key, int def);
 float option_find_float(list *l, char *key, float def);
+float option_find_float_quiet(list *l, char *key, float def);
 void option_unused(list *l);
 
 #endif
diff --git a/src/parser.c b/src/parser.c
index b008882d..16563465 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -9,6 +9,7 @@
 #include "maxpool_layer.h"
 #include "normalization_layer.h"
 #include "softmax_layer.h"
+#include "dropout_layer.h"
 #include "list.h"
 #include "option_list.h"
 #include "utils.h"
@@ -21,6 +22,7 @@ typedef struct{
 int is_convolutional(section *s);
 int is_connected(section *s);
 int is_maxpool(section *s);
+int is_dropout(section *s);
 int is_softmax(section *s);
 int is_normalization(section *s);
 list *read_cfg(char *filename);
@@ -41,10 +43,11 @@ void free_section(section *s)
     free(s);
 }
 
-convolutional_layer *parse_convolutional(list *options, network net, int count)
+convolutional_layer *parse_convolutional(list *options, network *net, int count)
 {
     int i;
     int h,w,c;
+    float learning_rate, momentum, decay;
     int n = option_find_int(options, "filters",1);
     int size = option_find_int(options, "size",1);
     int stride = option_find_int(options, "stride",1);
@@ -52,18 +55,27 @@ convolutional_layer *parse_convolutional(list *options, network net, int count)
     char *activation_s = option_find_str(options, "activation", "sigmoid");
     ACTIVATION activation = get_activation(activation_s);
     if(count == 0){
+        learning_rate = option_find_float(options, "learning_rate", .001);
+        momentum = option_find_float(options, "momentum", .9);
+        decay = option_find_float(options, "decay", .0001);
         h = option_find_int(options, "height",1);
         w = option_find_int(options, "width",1);
         c = option_find_int(options, "channels",1);
-        net.batch = option_find_int(options, "batch",1);
+        net->batch = option_find_int(options, "batch",1);
+        net->learning_rate = learning_rate;
+        net->momentum = momentum;
+        net->decay = decay;
     }else{
-        image m =  get_network_image_layer(net, count-1);
+        learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate);
+        momentum = option_find_float_quiet(options, "momentum", net->momentum);
+        decay = option_find_float_quiet(options, "decay", net->decay);
+        image m =  get_network_image_layer(*net, count-1);
         h = m.h;
         w = m.w;
         c = m.c;
         if(h == 0) error("Layer before convolutional layer must output image.");
     }
-    convolutional_layer *layer = make_convolutional_layer(net.batch,h,w,c,n,size,stride,pad,activation);
+    convolutional_layer *layer = make_convolutional_layer(net->batch,h,w,c,n,size,stride,pad,activation,learning_rate,momentum,decay);
     char *data = option_find_str(options, "data", 0);
     if(data){
         char *curr = data;
@@ -81,25 +93,60 @@ convolutional_layer *parse_convolutional(list *options, network net, int count)
             curr = next+1;
         }
     }
+    char *weights = option_find_str(options, "weights", 0);
+    char *biases = option_find_str(options, "biases", 0);
+    if(biases){
+        char *curr = biases;
+        char *next = biases;
+        int done = 0;
+        for(i = 0; i < n && !done; ++i){
+            while(*++next !='\0' && *next != ',');
+            if(*next == '\0') done = 1;
+            *next = '\0';
+            sscanf(curr, "%g", &layer->biases[i]);
+            curr = next+1;
+        }
+    }
+    if(weights){
+        char *curr = weights;
+        char *next = weights;
+        int done = 0;
+        for(i = 0; i < c*n*size*size && !done; ++i){
+            while(*++next !='\0' && *next != ',');
+            if(*next == '\0') done = 1;
+            *next = '\0';
+            sscanf(curr, "%g", &layer->filters[i]);
+            curr = next+1;
+        }
+    }
     option_unused(options);
     return layer;
 }
 
-connected_layer *parse_connected(list *options, network net, int count)
+connected_layer *parse_connected(list *options, network *net, int count)
 {
     int i;
     int input;
+    float learning_rate, momentum, decay;
     int output = option_find_int(options, "output",1);
-    float dropout = option_find_float(options, "dropout", 0.);
     char *activation_s = option_find_str(options, "activation", "sigmoid");
     ACTIVATION activation = get_activation(activation_s);
     if(count == 0){
         input = option_find_int(options, "input",1);
-        net.batch = option_find_int(options, "batch",1);
+        net->batch = option_find_int(options, "batch",1);
+        learning_rate = option_find_float(options, "learning_rate", .001);
+        momentum = option_find_float(options, "momentum", .9);
+        decay = option_find_float(options, "decay", .0001);
+        net->learning_rate = learning_rate;
+        net->momentum = momentum;
+        net->decay = decay;
     }else{
-        input =  get_network_output_size_layer(net, count-1);
+        learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate);
+        momentum = option_find_float_quiet(options, "momentum", net->momentum);
+        decay = option_find_float_quiet(options, "decay", net->decay);
+        input =  get_network_output_size_layer(*net, count-1);
     }
-    connected_layer *layer = make_connected_layer(net.batch, input, output, dropout, activation);
+    connected_layer *layer = make_connected_layer(net->batch, input, output, activation,learning_rate,momentum,decay);
     char *data = option_find_str(options, "data", 0);
     if(data){
         char *curr = data;
@@ -121,42 +168,58 @@ connected_layer *parse_connected(list *options, network net, int count)
     return layer;
 }
 
-softmax_layer *parse_softmax(list *options, network net, int count)
+softmax_layer *parse_softmax(list *options, network *net, int count)
 {
     int input;
     if(count == 0){
         input = option_find_int(options, "input",1);
-        net.batch = option_find_int(options, "batch",1);
+        net->batch = option_find_int(options, "batch",1);
     }else{
-        input =  get_network_output_size_layer(net, count-1);
+        input =  get_network_output_size_layer(*net, count-1);
     }
-    softmax_layer *layer = make_softmax_layer(net.batch, input);
+    softmax_layer *layer = make_softmax_layer(net->batch, input);
     option_unused(options);
     return layer;
 }
 
-maxpool_layer *parse_maxpool(list *options, network net, int count)
+maxpool_layer *parse_maxpool(list *options, network *net, int count)
 {
     int h,w,c;
     int stride = option_find_int(options, "stride",1);
+    int size = option_find_int(options, "size",stride);
     if(count == 0){
         h = option_find_int(options, "height",1);
         w = option_find_int(options, "width",1);
         c = option_find_int(options, "channels",1);
-        net.batch = option_find_int(options, "batch",1);
+        net->batch = option_find_int(options, "batch",1);
     }else{
-        image m =  get_network_image_layer(net, count-1);
+        image m =  get_network_image_layer(*net, count-1);
         h = m.h;
         w = m.w;
         c = m.c;
         if(h == 0) error("Layer before convolutional layer must output image.");
     }
-    maxpool_layer *layer = make_maxpool_layer(net.batch,h,w,c,stride);
+    maxpool_layer *layer = make_maxpool_layer(net->batch,h,w,c,size,stride);
     option_unused(options);
     return layer;
 }
 
-normalization_layer *parse_normalization(list *options, network net, int count)
+dropout_layer *parse_dropout(list *options, network *net, int count)
+{
+    int input;
+    float probability = option_find_float(options, "probability", .5);
+    if(count == 0){
+        net->batch = option_find_int(options, "batch",1);
+        input = option_find_int(options, "input",1);
+    }else{
+        input =  get_network_output_size_layer(*net, count-1);
+    }
+    dropout_layer *layer = make_dropout_layer(net->batch,input,probability);
+    option_unused(options);
+    return layer;
+}
+
+normalization_layer *parse_normalization(list *options, network *net, int count)
 {
     int h,w,c;
     int size = option_find_int(options, "size",1);
@@ -167,15 +230,15 @@ normalization_layer *parse_normalization(list *options, network net, int count)
         h = option_find_int(options, "height",1);
         w = option_find_int(options, "width",1);
         c = option_find_int(options, "channels",1);
-        net.batch = option_find_int(options, "batch",1);
+        net->batch = option_find_int(options, "batch",1);
     }else{
-        image m =  get_network_image_layer(net, count-1);
+        image m =  get_network_image_layer(*net, count-1);
         h = m.h;
         w = m.w;
         c = m.c;
         if(h == 0) error("Layer before convolutional layer must output image.");
     }
-    normalization_layer *layer = make_normalization_layer(net.batch,h,w,c,size, alpha, beta, kappa);
+    normalization_layer *layer = make_normalization_layer(net->batch,h,w,c,size, alpha, beta, kappa);
     option_unused(options);
     return layer;
 }
@@ -191,30 +254,29 @@ network parse_network_cfg(char *filename)
         section *s = (section *)n->val;
         list *options = s->options;
         if(is_convolutional(s)){
-            convolutional_layer *layer = parse_convolutional(options, net, count);
+            convolutional_layer *layer = parse_convolutional(options, &net, count);
             net.types[count] = CONVOLUTIONAL;
             net.layers[count] = layer;
-            net.batch = layer->batch;
         }else if(is_connected(s)){
-            connected_layer *layer = parse_connected(options, net, count);
+            connected_layer *layer = parse_connected(options, &net, count);
             net.types[count] = CONNECTED;
             net.layers[count] = layer;
-            net.batch = layer->batch;
         }else if(is_softmax(s)){
-            softmax_layer *layer = parse_softmax(options, net, count);
+            softmax_layer *layer = parse_softmax(options, &net, count);
             net.types[count] = SOFTMAX;
             net.layers[count] = layer;
-            net.batch = layer->batch;
         }else if(is_maxpool(s)){
-            maxpool_layer *layer = parse_maxpool(options, net, count);
+            maxpool_layer *layer = parse_maxpool(options, &net, count);
             net.types[count] = MAXPOOL;
             net.layers[count] = layer;
-            net.batch = layer->batch;
         }else if(is_normalization(s)){
-            normalization_layer *layer = parse_normalization(options, net, count);
+            normalization_layer *layer = parse_normalization(options, &net, count);
             net.types[count] = NORMALIZATION;
             net.layers[count] = layer;
-            net.batch = layer->batch;
+        }else if(is_dropout(s)){
+            dropout_layer *layer = parse_dropout(options, &net, count);
+            net.types[count] = DROPOUT;
+            net.layers[count] = layer;
         }else{
             fprintf(stderr, "Type not recognized: %s\n", s->type);
         }
@@ -243,6 +305,10 @@ int is_maxpool(section *s)
     return (strcmp(s->type, "[max]")==0
             || strcmp(s->type, "[maxpool]")==0);
 }
+int is_dropout(section *s)
+{
+    return (strcmp(s->type, "[dropout]")==0);
+}
 
 int is_softmax(section *s)
 {
@@ -308,3 +374,120 @@ list *read_cfg(char *filename)
     return sections;
 }
 
+void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int count)
+{
+    int i;
+    fprintf(fp, "[convolutional]\n");
+    if(count == 0) {
+        fprintf(fp,   "batch=%d\n"
+                "height=%d\n"
+                "width=%d\n"
+                "channels=%d\n"
+                "learning_rate=%g\n"
+                "momentum=%g\n"
+                "decay=%g\n",
+                l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay);
+    } else {
+        if(l->learning_rate != net.learning_rate)
+                fprintf(fp, "learning_rate=%g\n", l->learning_rate);
+        if(l->momentum != net.momentum)
+                fprintf(fp, "momentum=%g\n", l->momentum);
+        if(l->decay != net.decay)
+                fprintf(fp, "decay=%g\n", l->decay);
+    }
+    fprintf(fp, "filters=%d\n"
+            "size=%d\n"
+            "stride=%d\n"
+            "pad=%d\n"
+            "activation=%s\n",
+            l->n, l->size, l->stride, l->pad,
+            get_activation_string(l->activation));
+    fprintf(fp, "biases=");
+    for(i = 0; i < l->n; ++i) fprintf(fp, "%g,", l->biases[i]);
+    fprintf(fp, "\n");
+    fprintf(fp, "weights=");
+    for(i = 0; i < l->n*l->c*l->size*l->size; ++i) fprintf(fp, "%g,", l->filters[i]);
+    fprintf(fp, "\n\n");
+}
+void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count)
+{
+    int i;
+    fprintf(fp, "[connected]\n");
+    if(count == 0){
+        fprintf(fp, "batch=%d\n"
+                "input=%d\n"
+                "learning_rate=%g\n"
+                "momentum=%g\n"
+                "decay=%g\n",
+                l->batch, l->inputs, l->learning_rate, l->momentum, l->decay);
+    } else {
+        if(l->learning_rate != net.learning_rate)
+            fprintf(fp, "learning_rate=%g\n", l->learning_rate);
+        if(l->momentum != net.momentum)
+            fprintf(fp, "momentum=%g\n", l->momentum);
+        if(l->decay != net.decay)
+            fprintf(fp, "decay=%g\n", l->decay);
+    }
+    fprintf(fp, "output=%d\n"
+            "activation=%s\n",
+            l->outputs,
+            get_activation_string(l->activation));
+    fprintf(fp, "data=");
+    for(i = 0; i < l->outputs; ++i) fprintf(fp, "%g,", l->biases[i]);
+    for(i = 0; i < l->inputs*l->outputs; ++i) fprintf(fp, "%g,", l->weights[i]);
+    fprintf(fp, "\n\n");
+}
+
+void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count)
+{
+    fprintf(fp, "[maxpool]\n");
+    if(count == 0) fprintf(fp,   "batch=%d\n"
+            "height=%d\n"
+            "width=%d\n"
+            "channels=%d\n",
+            l->batch,l->h, l->w, l->c);
+    fprintf(fp, "size=%d\nstride=%d\n\n", l->size, l->stride);
+}
+
+void print_normalization_cfg(FILE *fp, normalization_layer *l, network net, int count)
+{
+    fprintf(fp, "[localresponsenormalization]\n");
+    if(count == 0) fprintf(fp,   "batch=%d\n"
+            "height=%d\n"
+            "width=%d\n"
+            "channels=%d\n",
+            l->batch,l->h, l->w, l->c);
+    fprintf(fp, "size=%d\n"
+            "alpha=%g\n"
+            "beta=%g\n"
+            "kappa=%g\n\n", l->size, l->alpha, l->beta, l->kappa);
+}
+
+void print_softmax_cfg(FILE *fp, softmax_layer *l, network net, int count)
+{
+    fprintf(fp, "[softmax]\n");
+    if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
+    fprintf(fp, "\n");
+}
+
+void save_network(network net, char *filename)
+{
+    FILE *fp = fopen(filename, "w");
+    if(!fp) file_error(filename);
+    int i;
+    for(i = 0; i < net.n; ++i)
+    {
+        if(net.types[i] == CONVOLUTIONAL)
+            print_convolutional_cfg(fp, (convolutional_layer *)net.layers[i], net, i);
+        else if(net.types[i] == CONNECTED)
+            print_connected_cfg(fp, (connected_layer *)net.layers[i], net, i);
+        else if(net.types[i] == MAXPOOL)
+            print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i);
+        else if(net.types[i] == NORMALIZATION)
+            print_normalization_cfg(fp, (normalization_layer *)net.layers[i], net, i);
+        else if(net.types[i] == SOFTMAX)
+            print_softmax_cfg(fp, (softmax_layer *)net.layers[i], net, i);
+    }
+    fclose(fp);
+}
+
diff --git a/src/parser.h b/src/parser.h
index 878baa35..891e658b 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -3,5 +3,6 @@
 #include "network.h"
 
 network parse_network_cfg(char *filename);
+void save_network(network net, char *filename);
 
 #endif
diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index 12684238..b6e9fe9e 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -1,4 +1,5 @@
 #include "softmax_layer.h"
+#include "mini_blas.h"
 #include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -11,6 +12,7 @@ softmax_layer *make_softmax_layer(int batch, int inputs)
     layer->inputs = inputs;
     layer->output = calloc(inputs*batch, sizeof(float));
     layer->delta = calloc(inputs*batch, sizeof(float));
+    layer->jacobian = calloc(inputs*inputs*batch, sizeof(float));
     return layer;
 }
 
@@ -51,6 +53,28 @@ void forward_softmax_layer(const softmax_layer layer, float *input)
 
 void backward_softmax_layer(const softmax_layer layer, float *input, float *delta)
 {
+/*
+    int i,j,b;
+    for(b = 0; b < layer.batch; ++b){
+        for(i = 0; i < layer.inputs; ++i){
+            for(j = 0; j < layer.inputs; ++j){
+                int d = (i==j);
+                layer.jacobian[b*layer.inputs*layer.inputs + i*layer.inputs + j] = 
+                        layer.output[b*layer.inputs + i] * (d - layer.output[b*layer.inputs + j]);
+            }
+        }
+    }
+    for(b = 0; b < layer.batch; ++b){
+        int M = layer.inputs;
+        int N = 1;
+        int K = layer.inputs;
+        float *A = layer.jacobian + b*layer.inputs*layer.inputs;
+        float *B = layer.delta + b*layer.inputs;
+        float *C = delta + b*layer.inputs;
+        gemm(0,0,M,N,K,1,A,K,B,N,0,C,N);
+    }
+    */
+
     int i;
     for(i = 0; i < layer.inputs*layer.batch; ++i){
         delta[i] = layer.delta[i];
diff --git a/src/softmax_layer.h b/src/softmax_layer.h
index 414030c6..22752508 100644
--- a/src/softmax_layer.h
+++ b/src/softmax_layer.h
@@ -6,6 +6,7 @@ typedef struct {
     int batch;
     float *delta;
     float *output;
+    float *jacobian;
 } softmax_layer;
 
 softmax_layer *make_softmax_layer(int batch, int inputs);