From dcb000b553d051429a49c8729dc5b1af632e8532 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Wed, 11 Mar 2015 22:20:15 -0700 Subject: [PATCH] refactoring and added DARK ZONE --- src/captcha.c | 2 +- src/connected_layer.c | 80 ++---- src/connected_layer.h | 20 +- src/convolutional_kernels.cu | 33 +-- src/convolutional_layer.c | 34 ++- src/convolutional_layer.h | 19 +- src/cost_layer.c | 36 +-- src/cost_layer.h | 9 +- src/crop_layer.c | 6 +- src/crop_layer.h | 5 +- src/crop_layer_kernels.cu | 6 +- src/data.c | 86 ++++-- src/data.h | 4 +- src/deconvolutional_kernels.cu | 26 +- src/deconvolutional_layer.c | 32 +-- src/deconvolutional_layer.h | 19 +- src/detection.c | 37 ++- src/detection_layer.c | 95 +++++-- src/detection_layer.h | 10 +- src/dropout_layer.c | 21 +- src/dropout_layer.h | 11 +- src/dropout_layer_kernels.cu | 18 +- src/freeweight_layer.c | 25 -- src/freeweight_layer.h | 14 - src/maxpool_layer.c | 10 +- src/maxpool_layer.h | 9 +- src/maxpool_layer_kernels.cu | 8 +- src/network.c | 197 +++++--------- src/network.h | 12 +- src/network_kernels.cu | 133 ++++----- src/normalization_layer.c | 15 +- src/normalization_layer.h | 5 +- src/params.h | 12 + src/parser.c | 474 +++++++++++---------------------- src/softmax_layer.c | 10 +- src/softmax_layer.h | 11 +- src/softmax_layer_kernels.cu | 14 +- 37 files changed, 640 insertions(+), 918 deletions(-) delete mode 100644 src/freeweight_layer.c delete mode 100644 src/freeweight_layer.h create mode 100644 src/params.h diff --git a/src/captcha.c b/src/captcha.c index 17b3f062..40a4082d 100644 --- a/src/captcha.c +++ b/src/captcha.c @@ -16,7 +16,7 @@ void train_captcha(char *cfgfile, char *weightfile) printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 1024; int i = net.seen/imgs; - list *plist = get_paths("/data/captcha/train.list"); + list *plist = get_paths("/data/captcha/train.base"); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); clock_t time; diff --git a/src/connected_layer.c b/src/connected_layer.c index 642570c9..9df0e8fd 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -9,15 +9,11 @@ #include #include -connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay) +connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation) { int i; connected_layer *layer = calloc(1, sizeof(connected_layer)); - layer->learning_rate = learning_rate; - layer->momentum = momentum; - layer->decay = decay; - layer->inputs = inputs; layer->outputs = outputs; layer->batch=batch; @@ -59,41 +55,17 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVA return layer; } -void secret_update_connected_layer(connected_layer *layer) +void update_connected_layer(connected_layer layer, float learning_rate, float momentum, float decay) { - int n = layer->outputs*layer->inputs; - float dot = dot_cpu(n, layer->weight_updates, 1, layer->weight_prev, 1); - float mag = sqrt(dot_cpu(n, layer->weight_updates, 1, layer->weight_updates, 1)) - * sqrt(dot_cpu(n, layer->weight_prev, 1, layer->weight_prev, 1)); - float cos = dot/mag; - if(cos > .3) layer->learning_rate *= 1.1; - else if (cos < -.3) layer-> learning_rate /= 1.1; + axpy_cpu(layer.outputs, learning_rate, layer.bias_updates, 1, layer.biases, 1); + scal_cpu(layer.outputs, momentum, layer.bias_updates, 1); - scal_cpu(n, layer->momentum, layer->weight_prev, 1); - axpy_cpu(n, 1, layer->weight_updates, 1, layer->weight_prev, 1); - scal_cpu(n, 0, layer->weight_updates, 1); - - scal_cpu(layer->outputs, layer->momentum, layer->bias_prev, 1); - axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1); - scal_cpu(layer->outputs, 0, layer->bias_updates, 1); - - axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1); - - axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1); - axpy_cpu(layer->inputs*layer->outputs, layer->learning_rate, layer->weight_prev, 1, layer->weights, 1); + axpy_cpu(layer.inputs*layer.outputs, -decay, layer.weights, 1, layer.weight_updates, 1); + axpy_cpu(layer.inputs*layer.outputs, learning_rate, layer.weight_updates, 1, layer.weights, 1); + scal_cpu(layer.inputs*layer.outputs, momentum, layer.weight_updates, 1); } -void update_connected_layer(connected_layer layer) -{ - axpy_cpu(layer.outputs, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1); - scal_cpu(layer.outputs, layer.momentum, layer.bias_updates, 1); - - axpy_cpu(layer.inputs*layer.outputs, -layer.decay, layer.weights, 1, layer.weight_updates, 1); - axpy_cpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates, 1, layer.weights, 1); - scal_cpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates, 1); -} - -void forward_connected_layer(connected_layer layer, float *input) +void forward_connected_layer(connected_layer layer, network_state state) { int i; for(i = 0; i < layer.batch; ++i){ @@ -102,14 +74,14 @@ void forward_connected_layer(connected_layer layer, float *input) int m = layer.batch; int k = layer.inputs; int n = layer.outputs; - float *a = input; + float *a = state.input; float *b = layer.weights; float *c = layer.output; gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); activate_array(layer.output, layer.outputs*layer.batch, layer.activation); } -void backward_connected_layer(connected_layer layer, float *input, float *delta) +void backward_connected_layer(connected_layer layer, network_state state) { int i; float alpha = 1./layer.batch; @@ -120,7 +92,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta) int m = layer.inputs; int k = layer.batch; int n = layer.outputs; - float *a = input; + float *a = state.input; float *b = layer.delta; float *c = layer.weight_updates; gemm(1,0,m,n,k,alpha,a,m,b,n,1,c,n); @@ -131,7 +103,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta) a = layer.delta; b = layer.weights; - c = delta; + c = state.delta; if(c) gemm(0,1,m,n,k,1,a,k,b,k,0,c,n); } @@ -154,23 +126,17 @@ void push_connected_layer(connected_layer layer) cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.outputs); } -void update_connected_layer_gpu(connected_layer layer) +void update_connected_layer_gpu(connected_layer layer, float learning_rate, float momentum, float decay) { -/* - cuda_pull_array(layer.weights_gpu, layer.weights, layer.inputs*layer.outputs); - cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.inputs*layer.outputs); - printf("Weights: %f updates: %f\n", mag_array(layer.weights, layer.inputs*layer.outputs), layer.learning_rate*mag_array(layer.weight_updates, layer.inputs*layer.outputs)); -*/ + axpy_ongpu(layer.outputs, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); + scal_ongpu(layer.outputs, momentum, layer.bias_updates_gpu, 1); - axpy_ongpu(layer.outputs, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); - scal_ongpu(layer.outputs, layer.momentum, layer.bias_updates_gpu, 1); - - axpy_ongpu(layer.inputs*layer.outputs, -layer.decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); - axpy_ongpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); - scal_ongpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates_gpu, 1); + axpy_ongpu(layer.inputs*layer.outputs, -decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); + axpy_ongpu(layer.inputs*layer.outputs, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); + scal_ongpu(layer.inputs*layer.outputs, momentum, layer.weight_updates_gpu, 1); } -void forward_connected_layer_gpu(connected_layer layer, float * input) +void forward_connected_layer_gpu(connected_layer layer, network_state state) { int i; for(i = 0; i < layer.batch; ++i){ @@ -179,14 +145,14 @@ void forward_connected_layer_gpu(connected_layer layer, float * input) int m = layer.batch; int k = layer.inputs; int n = layer.outputs; - float * a = input; + float * a = state.input; float * b = layer.weights_gpu; float * c = layer.output_gpu; gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n); activate_array_ongpu(layer.output_gpu, layer.outputs*layer.batch, layer.activation); } -void backward_connected_layer_gpu(connected_layer layer, float * input, float * delta) +void backward_connected_layer_gpu(connected_layer layer, network_state state) { float alpha = 1./layer.batch; int i; @@ -197,7 +163,7 @@ void backward_connected_layer_gpu(connected_layer layer, float * input, float * int m = layer.inputs; int k = layer.batch; int n = layer.outputs; - float * a = input; + float * a = state.input; float * b = layer.delta_gpu; float * c = layer.weight_updates_gpu; gemm_ongpu(1,0,m,n,k,alpha,a,m,b,n,1,c,n); @@ -208,7 +174,7 @@ void backward_connected_layer_gpu(connected_layer layer, float * input, float * a = layer.delta_gpu; b = layer.weights_gpu; - c = delta; + c = state.delta; if(c) gemm_ongpu(0,1,m,n,k,1,a,k,b,k,0,c,n); } diff --git a/src/connected_layer.h b/src/connected_layer.h index 921f06f0..26425997 100644 --- a/src/connected_layer.h +++ b/src/connected_layer.h @@ -2,12 +2,9 @@ #define CONNECTED_LAYER_H #include "activations.h" +#include "params.h" typedef struct{ - float learning_rate; - float momentum; - float decay; - int batch; int inputs; int outputs; @@ -37,17 +34,16 @@ typedef struct{ } connected_layer; -void secret_update_connected_layer(connected_layer *layer); -connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay); +connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation); -void forward_connected_layer(connected_layer layer, float *input); -void backward_connected_layer(connected_layer layer, float *input, float *delta); -void update_connected_layer(connected_layer layer); +void forward_connected_layer(connected_layer layer, network_state state); +void backward_connected_layer(connected_layer layer, network_state state); +void update_connected_layer(connected_layer layer, float learning_rate, float momentum, float decay); #ifdef GPU -void forward_connected_layer_gpu(connected_layer layer, float * input); -void backward_connected_layer_gpu(connected_layer layer, float * input, float * delta); -void update_connected_layer_gpu(connected_layer layer); +void forward_connected_layer_gpu(connected_layer layer, network_state state); +void backward_connected_layer_gpu(connected_layer layer, network_state state); +void update_connected_layer_gpu(connected_layer layer, float learning_rate, float momentum, float decay); void push_connected_layer(connected_layer layer); void pull_connected_layer(connected_layer layer); #endif diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index bcf307f2..77304aa8 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -54,7 +54,7 @@ extern "C" void backward_bias_gpu(float *bias_updates, float *delta, int batch, check_error(cudaPeekAtLastError()); } -extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float *in) +extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state) { int i; int m = layer.n; @@ -65,7 +65,7 @@ extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float bias_output_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, n); for(i = 0; i < layer.batch; ++i){ - im2col_ongpu(in + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); + im2col_ongpu(state.input + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); float * a = layer.filters_gpu; float * b = layer.col_image_gpu; float * c = layer.output_gpu; @@ -74,7 +74,7 @@ extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation); } -extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu) +extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state) { float alpha = 1./layer.batch; int i; @@ -86,17 +86,17 @@ extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, floa gradient_array_ongpu(layer.output_gpu, m*k*layer.batch, layer.activation, layer.delta_gpu); backward_bias_gpu(layer.bias_updates_gpu, layer.delta_gpu, layer.batch, layer.n, k); - if(delta_gpu) scal_ongpu(layer.batch*layer.h*layer.w*layer.c, 0, delta_gpu, 1); + if(state.delta) scal_ongpu(layer.batch*layer.h*layer.w*layer.c, 0, state.delta, 1); for(i = 0; i < layer.batch; ++i){ float * a = layer.delta_gpu; float * b = layer.col_image_gpu; float * c = layer.filter_updates_gpu; - im2col_ongpu(in + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); + im2col_ongpu(state.input + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); gemm_ongpu(0,1,m,n,k,alpha,a + i*m*k,k,b,k,1,c,n); - if(delta_gpu){ + if(state.delta){ float * a = layer.filters_gpu; float * b = layer.delta_gpu; @@ -104,7 +104,7 @@ extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, floa gemm_ongpu(1,0,n,k,m,1,a,n,b + i*k*m,k,0,c,k); - col2im_ongpu(layer.col_image_gpu, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta_gpu + i*layer.c*layer.h*layer.w); + col2im_ongpu(layer.col_image_gpu, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, state.delta + i*layer.c*layer.h*layer.w); } } } @@ -125,22 +125,15 @@ extern "C" void push_convolutional_layer(convolutional_layer layer) cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); } -extern "C" void update_convolutional_layer_gpu(convolutional_layer layer) +extern "C" void update_convolutional_layer_gpu(convolutional_layer layer, float learning_rate, float momentum, float decay) { int size = layer.size*layer.size*layer.c*layer.n; -/* - cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size); - cuda_pull_array(layer.filters_gpu, layer.filters, size); - printf("Filter: %f updates: %f\n", mag_array(layer.filters, size), layer.learning_rate*mag_array(layer.filter_updates, size)); - */ + axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); + scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1); - axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); - scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1); - - axpy_ongpu(size, -layer.decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); - axpy_ongpu(size, layer.learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); - scal_ongpu(size, layer.momentum, layer.filter_updates_gpu, 1); - //pull_convolutional_layer(layer); + axpy_ongpu(size, -decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); + axpy_ongpu(size, learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); + scal_ongpu(size, momentum, layer.filter_updates_gpu, 1); } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 7782e3d1..ad0d1c1a 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -41,15 +41,11 @@ image get_convolutional_delta(convolutional_layer layer) return float_to_image(h,w,c,layer.delta); } -convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay) +convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) { int i; convolutional_layer *layer = calloc(1, sizeof(convolutional_layer)); - layer->learning_rate = learning_rate; - layer->momentum = momentum; - layer->decay = decay; - layer->h = h; layer->w = w; layer->c = c; @@ -143,7 +139,7 @@ void backward_bias(float *bias_updates, float *delta, int batch, int n, int size } -void forward_convolutional_layer(const convolutional_layer layer, float *in) +void forward_convolutional_layer(const convolutional_layer layer, network_state state) { int out_h = convolutional_out_height(layer); int out_w = convolutional_out_width(layer); @@ -160,16 +156,16 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in) float *c = layer.output; for(i = 0; i < layer.batch; ++i){ - im2col_cpu(in, layer.c, layer.h, layer.w, + im2col_cpu(state.input, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); c += n*m; - in += layer.c*layer.h*layer.w; + state.input += layer.c*layer.h*layer.w; } activate_array(layer.output, m*n*layer.batch, layer.activation); } -void backward_convolutional_layer(convolutional_layer layer, float *in, float *delta) +void backward_convolutional_layer(convolutional_layer layer, network_state state) { float alpha = 1./layer.batch; int i; @@ -181,40 +177,40 @@ void backward_convolutional_layer(convolutional_layer layer, float *in, float *d gradient_array(layer.output, m*k*layer.batch, layer.activation, layer.delta); backward_bias(layer.bias_updates, layer.delta, layer.batch, layer.n, k); - if(delta) memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); + if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); for(i = 0; i < layer.batch; ++i){ float *a = layer.delta + i*m*k; float *b = layer.col_image; float *c = layer.filter_updates; - float *im = in+i*layer.c*layer.h*layer.w; + float *im = state.input+i*layer.c*layer.h*layer.w; im2col_cpu(im, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, b); gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n); - if(delta){ + if(state.delta){ a = layer.filters; b = layer.delta + i*m*k; c = layer.col_image; gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); - col2im_cpu(layer.col_image, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta+i*layer.c*layer.h*layer.w); + col2im_cpu(layer.col_image, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, state.delta+i*layer.c*layer.h*layer.w); } } } -void update_convolutional_layer(convolutional_layer layer) +void update_convolutional_layer(convolutional_layer layer, float learning_rate, float momentum, float decay) { int size = layer.size*layer.size*layer.c*layer.n; - axpy_cpu(layer.n, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1); - scal_cpu(layer.n, layer.momentum, layer.bias_updates, 1); + axpy_cpu(layer.n, learning_rate, layer.bias_updates, 1, layer.biases, 1); + scal_cpu(layer.n, momentum, layer.bias_updates, 1); - axpy_cpu(size, -layer.decay, layer.filters, 1, layer.filter_updates, 1); - axpy_cpu(size, layer.learning_rate, layer.filter_updates, 1, layer.filters, 1); - scal_cpu(size, layer.momentum, layer.filter_updates, 1); + axpy_cpu(size, -decay, layer.filters, 1, layer.filter_updates, 1); + axpy_cpu(size, learning_rate, layer.filter_updates, 1, layer.filters, 1); + scal_cpu(size, momentum, layer.filter_updates, 1); } diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index 72f3f721..eaf15627 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -2,14 +2,11 @@ #define CONVOLUTIONAL_LAYER_H #include "cuda.h" +#include "params.h" #include "image.h" #include "activations.h" typedef struct { - float learning_rate; - float momentum; - float decay; - int batch; int h,w,c; int n; @@ -42,9 +39,9 @@ typedef struct { } convolutional_layer; #ifdef GPU -void forward_convolutional_layer_gpu(convolutional_layer layer, float * in); -void backward_convolutional_layer_gpu(convolutional_layer layer, float * in, float * delta_gpu); -void update_convolutional_layer_gpu(convolutional_layer layer); +void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state); +void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state); +void update_convolutional_layer_gpu(convolutional_layer layer, float learning_rate, float momentum, float decay); void push_convolutional_layer(convolutional_layer layer); void pull_convolutional_layer(convolutional_layer layer); @@ -53,13 +50,13 @@ void bias_output_gpu(float *output, float *biases, int batch, int n, int size); void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); #endif -convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay); +convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); void resize_convolutional_layer(convolutional_layer *layer, int h, int w); -void forward_convolutional_layer(const convolutional_layer layer, float *in); -void update_convolutional_layer(convolutional_layer layer); +void forward_convolutional_layer(const convolutional_layer layer, network_state state); +void update_convolutional_layer(convolutional_layer layer, float learning_rate, float momentum, float decay); image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters); -void backward_convolutional_layer(convolutional_layer layer, float *in, float *delta); +void backward_convolutional_layer(convolutional_layer layer, network_state state); void bias_output(float *output, float *biases, int batch, int n, int size); void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); diff --git a/src/cost_layer.c b/src/cost_layer.c index 81582751..d2c616ff 100644 --- a/src/cost_layer.c +++ b/src/cost_layer.c @@ -47,48 +47,36 @@ void push_cost_layer(cost_layer layer) cuda_push_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs); } -void forward_cost_layer(cost_layer layer, float *input, float *truth) +void forward_cost_layer(cost_layer layer, network_state state) { - if (!truth) return; - copy_cpu(layer.batch*layer.inputs, truth, 1, layer.delta, 1); - axpy_cpu(layer.batch*layer.inputs, -1, input, 1, layer.delta, 1); + if (!state.truth) return; + copy_cpu(layer.batch*layer.inputs, state.truth, 1, layer.delta, 1); + axpy_cpu(layer.batch*layer.inputs, -1, state.input, 1, layer.delta, 1); *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); //printf("cost: %f\n", *layer.output); } -void backward_cost_layer(const cost_layer layer, float *input, float *delta) +void backward_cost_layer(const cost_layer layer, network_state state) { - copy_cpu(layer.batch*layer.inputs, layer.delta, 1, delta, 1); + copy_cpu(layer.batch*layer.inputs, layer.delta, 1, state.delta, 1); } #ifdef GPU -void forward_cost_layer_gpu(cost_layer layer, float * input, float * truth) +void forward_cost_layer_gpu(cost_layer layer, network_state state) { - if (!truth) return; + if (!state.truth) return; - /* - float *in = calloc(layer.inputs*layer.batch, sizeof(float)); - float *t = calloc(layer.inputs*layer.batch, sizeof(float)); - cuda_pull_array(input, in, layer.batch*layer.inputs); - cuda_pull_array(truth, t, layer.batch*layer.inputs); - forward_cost_layer(layer, in, t); - cuda_push_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs); - free(in); - free(t); - */ - - copy_ongpu(layer.batch*layer.inputs, truth, 1, layer.delta_gpu, 1); - axpy_ongpu(layer.batch*layer.inputs, -1, input, 1, layer.delta_gpu, 1); + copy_ongpu(layer.batch*layer.inputs, state.truth, 1, layer.delta_gpu, 1); + axpy_ongpu(layer.batch*layer.inputs, -1, state.input, 1, layer.delta_gpu, 1); cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs); *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); - //printf("cost: %f\n", *layer.output); } -void backward_cost_layer_gpu(const cost_layer layer, float * input, float * delta) +void backward_cost_layer_gpu(const cost_layer layer, network_state state) { - copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, delta, 1); + copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, state.delta, 1); } #endif diff --git a/src/cost_layer.h b/src/cost_layer.h index 08554052..d4416988 100644 --- a/src/cost_layer.h +++ b/src/cost_layer.h @@ -1,5 +1,6 @@ #ifndef COST_LAYER_H #define COST_LAYER_H +#include "params.h" typedef enum{ SSE @@ -21,12 +22,12 @@ typedef struct { COST_TYPE get_cost_type(char *s); char *get_cost_string(COST_TYPE a); cost_layer *make_cost_layer(int batch, int inputs, COST_TYPE type); -void forward_cost_layer(const cost_layer layer, float *input, float *truth); -void backward_cost_layer(const cost_layer layer, float *input, float *delta); +void forward_cost_layer(const cost_layer layer, network_state state); +void backward_cost_layer(const cost_layer layer, network_state state); #ifdef GPU -void forward_cost_layer_gpu(cost_layer layer, float * input, float * truth); -void backward_cost_layer_gpu(const cost_layer layer, float * input, float * delta); +void forward_cost_layer_gpu(cost_layer layer, network_state state); +void backward_cost_layer_gpu(const cost_layer layer, network_state state); #endif #endif diff --git a/src/crop_layer.c b/src/crop_layer.c index 3f0011d4..cf1383ea 100644 --- a/src/crop_layer.c +++ b/src/crop_layer.c @@ -28,7 +28,7 @@ crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int return layer; } -void forward_crop_layer(const crop_layer layer, int train, float *input) +void forward_crop_layer(const crop_layer layer, network_state state) { int i,j,c,b,row,col; int index; @@ -36,7 +36,7 @@ void forward_crop_layer(const crop_layer layer, int train, float *input) int flip = (layer.flip && rand()%2); int dh = rand()%(layer.h - layer.crop_height + 1); int dw = rand()%(layer.w - layer.crop_width + 1); - if(!train){ + if(!state.train){ flip = 0; dh = (layer.h - layer.crop_height)/2; dw = (layer.w - layer.crop_width)/2; @@ -52,7 +52,7 @@ void forward_crop_layer(const crop_layer layer, int train, float *input) } row = i + dh; index = col+layer.w*(row+layer.h*(c + layer.c*b)); - layer.output[count++] = input[index]; + layer.output[count++] = state.input[index]; } } } diff --git a/src/crop_layer.h b/src/crop_layer.h index 0d2f03b7..05a511b3 100644 --- a/src/crop_layer.h +++ b/src/crop_layer.h @@ -2,6 +2,7 @@ #define CROP_LAYER_H #include "image.h" +#include "params.h" typedef struct { int batch; @@ -17,10 +18,10 @@ typedef struct { image get_crop_image(crop_layer layer); crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip); -void forward_crop_layer(const crop_layer layer, int train, float *input); +void forward_crop_layer(const crop_layer layer, network_state state); #ifdef GPU -void forward_crop_layer_gpu(crop_layer layer, int train, float *input); +void forward_crop_layer_gpu(crop_layer layer, network_state state); #endif #endif diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu index 628c7000..8c97f35a 100644 --- a/src/crop_layer_kernels.cu +++ b/src/crop_layer_kernels.cu @@ -24,12 +24,12 @@ __global__ void forward_crop_layer_kernel(float *input, int size, int c, int h, output[count] = input[index]; } -extern "C" void forward_crop_layer_gpu(crop_layer layer, int train, float *input) +extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) { int flip = (layer.flip && rand()%2); int dh = rand()%(layer.h - layer.crop_height + 1); int dw = rand()%(layer.w - layer.crop_width + 1); - if(!train){ + if(!state.train){ flip = 0; dh = (layer.h - layer.crop_height)/2; dw = (layer.w - layer.crop_width)/2; @@ -39,7 +39,7 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, int train, float *input dim3 dimBlock(BLOCK, 1, 1); dim3 dimGrid((size-1)/BLOCK + 1, 1, 1); - forward_crop_layer_kernel<<>>(input, size, layer.c, layer.h, layer.w, + forward_crop_layer_kernel<<>>(state.input, size, layer.c, layer.h, layer.w, layer.crop_height, layer.crop_width, dh, dw, flip, layer.output_gpu); check_error(cudaPeekAtLastError()); } diff --git a/src/data.c b/src/data.c index a429476c..342edfa4 100644 --- a/src/data.c +++ b/src/data.c @@ -18,6 +18,7 @@ struct load_args{ int nw; int jitter; int classes; + int background; data *d; }; @@ -62,17 +63,62 @@ matrix load_image_paths(char **paths, int n, int h, int w) return X; } -void fill_truth_detection(char *path, float *truth, int classes, int height, int width, int num_height, int num_width, int dy, int dx, int jitter, int flip) +typedef struct box{ + int id; + float x,y,w,h; +} box; + +box *read_boxes(char *filename, int *n) +{ + box *boxes = calloc(1, sizeof(box)); + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + float x, y, h, w; + int id; + int count = 0; + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + boxes = realloc(boxes, (count+1)*sizeof(box)); + boxes[count].id = id; + boxes[count].x = x; + boxes[count].y = y; + boxes[count].h = h; + boxes[count].w = w; + ++count; + } + fclose(file); + *n = count; + return boxes; +} + +void randomize_boxes(box *b, int n) +{ + int i; + for(i = 0; i < n; ++i){ + box swap = b[i]; + int index = rand()%n; + b[i] = b[index]; + b[index] = swap; + } +} + +void fill_truth_detection(char *path, float *truth, int classes, int height, int width, int num_height, int num_width, int dy, int dx, int jitter, int flip, int background) { int box_height = height/num_height; int box_width = width/num_width; char *labelpath = find_replace(path, "VOC2012/JPEGImages", "labels"); labelpath = find_replace(labelpath, ".jpg", ".txt"); - FILE *file = fopen(labelpath, "r"); - if(!file) file_error(labelpath); + int count = 0; + box *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); float x, y, h, w; int id; - while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + int i, j; + for(i = 0; i < count; ++i){ + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; if(flip) x = 1-x; x *= width + jitter; y *= height + jitter; @@ -88,23 +134,24 @@ void fill_truth_detection(char *path, float *truth, int classes, int height, int float dw = (x - i*box_width)/box_width; float dh = (y - j*box_height)/box_height; - //printf("%d %d %d %f %f\n", id, i, j, dh, dw); - int index = (i+j*num_width)*(4+classes); - if(truth[index+classes]) continue; + + int index = (i+j*num_width)*(4+classes+background); + if(truth[index+classes+background]) continue; truth[index+id] = 1; - index += classes; + index += classes+background; truth[index++] = dh; truth[index++] = dw; truth[index++] = h*(height+jitter)/height; truth[index++] = w*(width+jitter)/width; } - int i, j; - for(i = 0; i < num_height*num_width*(4+classes); i += 4+classes){ - int background = 1; - for(j = i; j < i+classes; ++j) if (truth[j]) background = 0; - truth[i+classes-1] = background; + free(boxes); + if(background){ + for(i = 0; i < num_height*num_width*(4+classes+background); i += 4+classes+background){ + int object = 0; + for(j = i; j < i+classes; ++j) if (truth[j]) object = 1; + truth[i+classes] = !object; + } } - fclose(file); } #define NUMCHARS 37 @@ -218,20 +265,20 @@ void free_data(data d) } } -data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter) +data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background) { char **random_paths = get_random_paths(paths, n, m); int i; data d; d.shallow = 0; d.X = load_image_paths(random_paths, n, h, w); - int k = nh*nw*(4+classes); + int k = nh*nw*(4+classes+background); d.y = make_matrix(n, k); for(i = 0; i < n; ++i){ int dx = rand()%jitter; int dy = rand()%jitter; int flip = rand()%2; - fill_truth_detection(random_paths[i], d.y.vals[i], classes, h-jitter, w-jitter, nh, nw, dy, dx, jitter, flip); + fill_truth_detection(random_paths[i], d.y.vals[i], classes, h-jitter, w-jitter, nh, nw, dy, dx, jitter, flip, background); image a = float_to_image(h, w, 3, d.X.vals[i]); if(flip) flip_image(a); jitter_image(a,h-jitter,w-jitter,dy,dx); @@ -245,14 +292,14 @@ void *load_detection_thread(void *ptr) { printf("Loading data: %d\n", rand()); struct load_args a = *(struct load_args*)ptr; - *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.h, a.w, a.nh, a.nw, a.jitter); + *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.h, a.w, a.nh, a.nw, a.jitter, a.background); translate_data_rows(*a.d, -128); scale_data_rows(*a.d, 1./128); free(ptr); return 0; } -pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d) +pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background, data *d) { pthread_t thread; struct load_args *args = calloc(1, sizeof(struct load_args)); @@ -265,6 +312,7 @@ pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, in args->nw = nw; args->classes = classes; args->jitter = jitter; + args->background = background; args->d = d; if(pthread_create(&thread, 0, load_detection_thread, args)) { error("Thread creation failed"); diff --git a/src/data.h b/src/data.h index 0cae7f52..ec186277 100644 --- a/src/data.h +++ b/src/data.h @@ -20,8 +20,8 @@ data load_data_captcha_encode(char **paths, int n, int m, int h, int w); data load_data(char **paths, int n, int m, char **labels, int k, int h, int w); pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d); -pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d); -data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter); +pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background, data *d); +data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background); data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w); data load_cifar10_data(char *filename); diff --git a/src/deconvolutional_kernels.cu b/src/deconvolutional_kernels.cu index 1d05a809..aeab2c3f 100644 --- a/src/deconvolutional_kernels.cu +++ b/src/deconvolutional_kernels.cu @@ -9,7 +9,7 @@ extern "C" { #include "cuda.h" } -extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, float *in) +extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) { int i; int out_h = deconvolutional_out_height(layer); @@ -24,7 +24,7 @@ extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, f for(i = 0; i < layer.batch; ++i){ float *a = layer.filters_gpu; - float *b = in + i*layer.c*layer.h*layer.w; + float *b = state.input + i*layer.c*layer.h*layer.w; float *c = layer.col_image_gpu; gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n); @@ -34,7 +34,7 @@ extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, f activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation); } -extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, float *in, float *delta_gpu) +extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) { float alpha = 1./layer.batch; int out_h = deconvolutional_out_height(layer); @@ -45,14 +45,14 @@ extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu); backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size); - if(delta_gpu) memset(delta_gpu, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); + if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); for(i = 0; i < layer.batch; ++i){ int m = layer.c; int n = layer.size*layer.size*layer.n; int k = layer.h*layer.w; - float *a = in + i*m*n; + float *a = state.input + i*m*n; float *b = layer.col_image_gpu; float *c = layer.filter_updates_gpu; @@ -60,14 +60,14 @@ extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, layer.size, layer.stride, 0, b); gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n); - if(delta_gpu){ + if(state.delta){ int m = layer.c; int n = layer.h*layer.w; int k = layer.size*layer.size*layer.n; float *a = layer.filters_gpu; float *b = layer.col_image_gpu; - float *c = delta_gpu + i*n*m; + float *c = state.delta + i*n*m; gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); } @@ -90,15 +90,15 @@ extern "C" void push_deconvolutional_layer(deconvolutional_layer layer) cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); } -extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer) +extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay) { int size = layer.size*layer.size*layer.c*layer.n; - axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); - scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1); + axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); + scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1); - axpy_ongpu(size, -layer.decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); - axpy_ongpu(size, layer.learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); - scal_ongpu(size, layer.momentum, layer.filter_updates_gpu, 1); + axpy_ongpu(size, -decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); + axpy_ongpu(size, learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); + scal_ongpu(size, momentum, layer.filter_updates_gpu, 1); } diff --git a/src/deconvolutional_layer.c b/src/deconvolutional_layer.c index d4a84267..83147b55 100644 --- a/src/deconvolutional_layer.c +++ b/src/deconvolutional_layer.c @@ -43,15 +43,11 @@ image get_deconvolutional_delta(deconvolutional_layer layer) return float_to_image(h,w,c,layer.delta); } -deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, float learning_rate, float momentum, float decay) +deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation) { int i; deconvolutional_layer *layer = calloc(1, sizeof(deconvolutional_layer)); - layer->learning_rate = learning_rate; - layer->momentum = momentum; - layer->decay = decay; - layer->h = h; layer->w = w; layer->c = c; @@ -120,7 +116,7 @@ void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w) #endif } -void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in) +void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state) { int i; int out_h = deconvolutional_out_height(layer); @@ -135,7 +131,7 @@ void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in) for(i = 0; i < layer.batch; ++i){ float *a = layer.filters; - float *b = in + i*layer.c*layer.h*layer.w; + float *b = state.input + i*layer.c*layer.h*layer.w; float *c = layer.col_image; gemm(1,0,m,n,k,1,a,m,b,n,0,c,n); @@ -145,7 +141,7 @@ void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in) activate_array(layer.output, layer.batch*layer.n*size, layer.activation); } -void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, float *delta) +void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state) { float alpha = 1./layer.batch; int out_h = deconvolutional_out_height(layer); @@ -156,14 +152,14 @@ void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, floa gradient_array(layer.output, size*layer.n*layer.batch, layer.activation, layer.delta); backward_bias(layer.bias_updates, layer.delta, layer.batch, layer.n, size); - if(delta) memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); + if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); for(i = 0; i < layer.batch; ++i){ int m = layer.c; int n = layer.size*layer.size*layer.n; int k = layer.h*layer.w; - float *a = in + i*m*n; + float *a = state.input + i*m*n; float *b = layer.col_image; float *c = layer.filter_updates; @@ -171,29 +167,29 @@ void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, floa layer.size, layer.stride, 0, b); gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n); - if(delta){ + if(state.delta){ int m = layer.c; int n = layer.h*layer.w; int k = layer.size*layer.size*layer.n; float *a = layer.filters; float *b = layer.col_image; - float *c = delta + i*n*m; + float *c = state.delta + i*n*m; gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); } } } -void update_deconvolutional_layer(deconvolutional_layer layer) +void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay) { int size = layer.size*layer.size*layer.c*layer.n; - axpy_cpu(layer.n, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1); - scal_cpu(layer.n, layer.momentum, layer.bias_updates, 1); + axpy_cpu(layer.n, learning_rate, layer.bias_updates, 1, layer.biases, 1); + scal_cpu(layer.n, momentum, layer.bias_updates, 1); - axpy_cpu(size, -layer.decay, layer.filters, 1, layer.filter_updates, 1); - axpy_cpu(size, layer.learning_rate, layer.filter_updates, 1, layer.filters, 1); - scal_cpu(size, layer.momentum, layer.filter_updates, 1); + axpy_cpu(size, -decay, layer.filters, 1, layer.filter_updates, 1); + axpy_cpu(size, learning_rate, layer.filter_updates, 1, layer.filters, 1); + scal_cpu(size, momentum, layer.filter_updates, 1); } diff --git a/src/deconvolutional_layer.h b/src/deconvolutional_layer.h index 1da43dca..0ece76f2 100644 --- a/src/deconvolutional_layer.h +++ b/src/deconvolutional_layer.h @@ -2,14 +2,11 @@ #define DECONVOLUTIONAL_LAYER_H #include "cuda.h" +#include "params.h" #include "image.h" #include "activations.h" typedef struct { - float learning_rate; - float momentum; - float decay; - int batch; int h,w,c; int n; @@ -41,18 +38,18 @@ typedef struct { } deconvolutional_layer; #ifdef GPU -void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, float * in); -void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, float * in, float * delta_gpu); -void update_deconvolutional_layer_gpu(deconvolutional_layer layer); +void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); +void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); +void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay); void push_deconvolutional_layer(deconvolutional_layer layer); void pull_deconvolutional_layer(deconvolutional_layer layer); #endif -deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, float learning_rate, float momentum, float decay); +deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation); void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w); -void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in); -void update_deconvolutional_layer(deconvolutional_layer layer); -void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, float *delta); +void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state); +void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay); +void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state); image get_deconvolutional_image(deconvolutional_layer layer); image get_deconvolutional_delta(deconvolutional_layer layer); diff --git a/src/detection.c b/src/detection.c index fa8b38c6..f8613478 100644 --- a/src/detection.c +++ b/src/detection.c @@ -61,15 +61,16 @@ void train_detection(char *cfgfile, char *weightfile) data train, buffer; int im_dim = 512; int jitter = 64; - int classes = 21; - pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, &buffer); + int classes = 20; + int background = 1; + pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, background, &buffer); clock_t time; while(1){ i += 1; time=clock(); pthread_join(load_thread, 0); train = buffer; - load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, &buffer); + load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, background, &buffer); /* image im = float_to_image(im_dim - jitter, im_dim-jitter, 3, train.X.vals[0]); @@ -103,10 +104,12 @@ void validate_detection(char *cfgfile, char *weightfile) srand(time(0)); list *plist = get_paths("/home/pjreddie/data/voc/val.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/train.txt"); char **paths = (char **)list_to_array(plist); - int num_output = 1225; int im_size = 448; - int classes = 21; + int classes = 20; + int background = 0; + int num_output = 7*7*(4+classes+background); int m = plist->size; int i = 0; @@ -130,26 +133,18 @@ void validate_detection(char *cfgfile, char *weightfile) matrix pred = network_predict_data(net, val); int j, k, class; for(j = 0; j < pred.rows; ++j){ - for(k = 0; k < pred.cols; k += classes+4){ - - /* - int z; - for(z = 0; z < 25; ++z) printf("%f, ", pred.vals[j][k+z]); - printf("\n"); - */ - - //if (pred.vals[j][k] > .001){ - for(class = 0; class < classes-1; ++class){ - int index = (k)/(classes+4); + for(k = 0; k < pred.cols; k += classes+4+background){ + for(class = 0; class < classes; ++class){ + int index = (k)/(classes+4+background); int r = index/7; int c = index%7; - float y = (r + pred.vals[j][k+0+classes])/7.; - float x = (c + pred.vals[j][k+1+classes])/7.; - float h = pred.vals[j][k+2+classes]; - float w = pred.vals[j][k+3+classes]; + int ci = k+classes+background; + float y = (r + pred.vals[j][ci + 0])/7.; + float x = (c + pred.vals[j][ci + 1])/7.; + float h = pred.vals[j][ci + 2]; + float w = pred.vals[j][ci + 3]; printf("%d %d %f %f %f %f %f\n", (i-1)*m/splits + j, class, pred.vals[j][k+class], y, x, h, w); } - //} } } diff --git a/src/detection_layer.c b/src/detection_layer.c index 68d151ac..5ca7fa2b 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -39,28 +39,52 @@ detection_layer *make_detection_layer(int batch, int inputs, int classes, int co return layer; } -void forward_detection_layer(const detection_layer layer, float *in, float *truth) + +void forward_detection_layer(const detection_layer layer, network_state state) { int in_i = 0; int out_i = 0; int locations = get_detection_layer_locations(layer); int i,j; for(i = 0; i < layer.batch*locations; ++i){ - int mask = (!truth || !truth[out_i + layer.classes - 1]); + int mask = (!state.truth || state.truth[out_i + layer.classes + 2]); float scale = 1; - if(layer.rescore) scale = in[in_i++]; + if(layer.rescore) scale = state.input[in_i++]; for(j = 0; j < layer.classes; ++j){ - layer.output[out_i++] = scale*in[in_i++]; + layer.output[out_i++] = scale*state.input[in_i++]; + } + if(!layer.rescore){ + softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes); + activate_array(state.input+in_i, layer.coords, LOGISTIC); } - softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes); - activate_array(in+in_i, layer.coords, LOGISTIC); for(j = 0; j < layer.coords; ++j){ - layer.output[out_i++] = mask*in[in_i++]; + layer.output[out_i++] = mask*state.input[in_i++]; } } } -void backward_detection_layer(const detection_layer layer, float *in, float *delta) +void dark_zone(detection_layer layer, int index, network_state state) +{ + int size = layer.classes+layer.rescore+layer.coords; + int location = (index%(7*7*size)) / size ; + int r = location / 7; + int c = location % 7; + int class = index%size; + if(layer.rescore) --class; + int dr, dc; + for(dr = -1; dr <= 1; ++dr){ + for(dc = -1; dc <= 1; ++dc){ + if(!(dr || dc)) continue; + if((r + dr) > 6 || (r + dr) < 0) continue; + if((c + dc) > 6 || (c + dc) < 0) continue; + int di = (dr*7 + dc) * size; + if(state.truth[index+di]) continue; + layer.delta[index + di] = 0; + } + } +} + +void backward_detection_layer(const detection_layer layer, network_state state) { int locations = get_detection_layer_locations(layer); int i,j; @@ -69,49 +93,68 @@ void backward_detection_layer(const detection_layer layer, float *in, float *del for(i = 0; i < layer.batch*locations; ++i){ float scale = 1; float latent_delta = 0; - if(layer.rescore) scale = in[in_i++]; + if(layer.rescore) scale = state.input[in_i++]; + if(!layer.rescore){ + for(j = 0; j < layer.classes-1; ++j){ + if(state.truth[out_i + j]) dark_zone(layer, out_i+j, state); + } + } for(j = 0; j < layer.classes; ++j){ - latent_delta += in[in_i]*layer.delta[out_i]; - delta[in_i++] = scale*layer.delta[out_i++]; + latent_delta += state.input[in_i]*layer.delta[out_i]; + state.delta[in_i++] = scale*layer.delta[out_i++]; } - - gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i); + + if (!layer.rescore) gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i); for(j = 0; j < layer.coords; ++j){ - delta[in_i++] = layer.delta[out_i++]; + state.delta[in_i++] = layer.delta[out_i++]; } - if(layer.rescore) delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta; + if(layer.rescore) state.delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta; } } #ifdef GPU -void forward_detection_layer_gpu(const detection_layer layer, float *in, float *truth) +void forward_detection_layer_gpu(const detection_layer layer, network_state state) { int outputs = get_detection_layer_output_size(layer); float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float)); float *truth_cpu = 0; - if(truth){ + if(state.truth){ truth_cpu = calloc(layer.batch*outputs, sizeof(float)); - cuda_pull_array(truth, truth_cpu, layer.batch*outputs); + cuda_pull_array(state.truth, truth_cpu, layer.batch*outputs); } - cuda_pull_array(in, in_cpu, layer.batch*layer.inputs); - forward_detection_layer(layer, in_cpu, truth_cpu); + cuda_pull_array(state.input, in_cpu, layer.batch*layer.inputs); + network_state cpu_state; + cpu_state.train = state.train; + cpu_state.truth = truth_cpu; + cpu_state.input = in_cpu; + forward_detection_layer(layer, cpu_state); cuda_push_array(layer.output_gpu, layer.output, layer.batch*outputs); - free(in_cpu); - if(truth_cpu) free(truth_cpu); + free(cpu_state.input); + if(cpu_state.truth) free(cpu_state.truth); } -void backward_detection_layer_gpu(detection_layer layer, float *in, float *delta) +void backward_detection_layer_gpu(detection_layer layer, network_state state) { int outputs = get_detection_layer_output_size(layer); float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float)); float *delta_cpu = calloc(layer.batch*layer.inputs, sizeof(float)); + float *truth_cpu = 0; + if(state.truth){ + truth_cpu = calloc(layer.batch*outputs, sizeof(float)); + cuda_pull_array(state.truth, truth_cpu, layer.batch*outputs); + } + network_state cpu_state; + cpu_state.train = state.train; + cpu_state.input = in_cpu; + cpu_state.truth = truth_cpu; + cpu_state.delta = delta_cpu; - cuda_pull_array(in, in_cpu, layer.batch*layer.inputs); + cuda_pull_array(state.input, in_cpu, layer.batch*layer.inputs); cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*outputs); - backward_detection_layer(layer, in_cpu, delta_cpu); - cuda_push_array(delta, delta_cpu, layer.batch*layer.inputs); + backward_detection_layer(layer, cpu_state); + cuda_push_array(state.delta, delta_cpu, layer.batch*layer.inputs); free(in_cpu); free(delta_cpu); diff --git a/src/detection_layer.h b/src/detection_layer.h index e7e9e206..69a83a73 100644 --- a/src/detection_layer.h +++ b/src/detection_layer.h @@ -1,6 +1,8 @@ #ifndef DETECTION_LAYER_H #define DETECTION_LAYER_H +#include "params.h" + typedef struct { int batch; int inputs; @@ -16,13 +18,13 @@ typedef struct { } detection_layer; detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore); -void forward_detection_layer(const detection_layer layer, float *in, float *truth); -void backward_detection_layer(const detection_layer layer, float *in, float *delta); +void forward_detection_layer(const detection_layer layer, network_state state); +void backward_detection_layer(const detection_layer layer, network_state state); int get_detection_layer_output_size(detection_layer layer); #ifdef GPU -void forward_detection_layer_gpu(const detection_layer layer, float *in, float *truth); -void backward_detection_layer_gpu(detection_layer layer, float *in, float *delta); +void forward_detection_layer_gpu(const detection_layer layer, network_state state); +void backward_detection_layer_gpu(detection_layer layer, network_state state); #endif #endif diff --git a/src/dropout_layer.c b/src/dropout_layer.c index 32a34089..7fbf8ff2 100644 --- a/src/dropout_layer.c +++ b/src/dropout_layer.c @@ -1,4 +1,5 @@ #include "dropout_layer.h" +#include "params.h" #include "utils.h" #include "cuda.h" #include @@ -11,11 +12,9 @@ dropout_layer *make_dropout_layer(int batch, int inputs, float probability) layer->probability = probability; layer->inputs = inputs; layer->batch = batch; - layer->output = calloc(inputs*batch, sizeof(float)); layer->rand = calloc(inputs*batch, sizeof(float)); layer->scale = 1./(1.-probability); #ifdef GPU - layer->output_gpu = cuda_make_array(layer->output, inputs*batch); layer->rand_gpu = cuda_make_array(layer->rand, inputs*batch); #endif return layer; @@ -23,36 +22,34 @@ dropout_layer *make_dropout_layer(int batch, int inputs, float probability) void resize_dropout_layer(dropout_layer *layer, int inputs) { - layer->output = realloc(layer->output, layer->inputs*layer->batch*sizeof(float)); layer->rand = realloc(layer->rand, layer->inputs*layer->batch*sizeof(float)); #ifdef GPU - cuda_free(layer->output_gpu); cuda_free(layer->rand_gpu); - layer->output_gpu = cuda_make_array(layer->output, inputs*layer->batch); layer->rand_gpu = cuda_make_array(layer->rand, inputs*layer->batch); #endif } -void forward_dropout_layer(dropout_layer layer, float *input) +void forward_dropout_layer(dropout_layer layer, network_state state) { int i; + if (!state.train) return; for(i = 0; i < layer.batch * layer.inputs; ++i){ float r = rand_uniform(); layer.rand[i] = r; - if(r < layer.probability) layer.output[i] = 0; - else layer.output[i] = input[i]*layer.scale; + if(r < layer.probability) state.input[i] = 0; + else state.input[i] *= layer.scale; } } -void backward_dropout_layer(dropout_layer layer, float *delta) +void backward_dropout_layer(dropout_layer layer, network_state state) { int i; - if(!delta) return; + if(!state.delta) return; for(i = 0; i < layer.batch * layer.inputs; ++i){ float r = layer.rand[i]; - if(r < layer.probability) delta[i] = 0; - else delta[i] *= layer.scale; + if(r < layer.probability) state.delta[i] = 0; + else state.delta[i] *= layer.scale; } } diff --git a/src/dropout_layer.h b/src/dropout_layer.h index 051ce472..d12d4a18 100644 --- a/src/dropout_layer.h +++ b/src/dropout_layer.h @@ -1,5 +1,6 @@ #ifndef DROPOUT_LAYER_H #define DROPOUT_LAYER_H +#include "params.h" typedef struct{ int batch; @@ -7,22 +8,20 @@ typedef struct{ float probability; float scale; float *rand; - float *output; #ifdef GPU float * rand_gpu; - float * output_gpu; #endif } dropout_layer; dropout_layer *make_dropout_layer(int batch, int inputs, float probability); -void forward_dropout_layer(dropout_layer layer, float *input); -void backward_dropout_layer(dropout_layer layer, float *delta); +void forward_dropout_layer(dropout_layer layer, network_state state); +void backward_dropout_layer(dropout_layer layer, network_state state); void resize_dropout_layer(dropout_layer *layer, int inputs); #ifdef GPU -void forward_dropout_layer_gpu(dropout_layer layer, float * input); -void backward_dropout_layer_gpu(dropout_layer layer, float * delta); +void forward_dropout_layer_gpu(dropout_layer layer, network_state state); +void backward_dropout_layer_gpu(dropout_layer layer, network_state state); #endif #endif diff --git a/src/dropout_layer_kernels.cu b/src/dropout_layer_kernels.cu index 371f0dc4..94f61ab0 100644 --- a/src/dropout_layer_kernels.cu +++ b/src/dropout_layer_kernels.cu @@ -2,32 +2,32 @@ extern "C" { #include "dropout_layer.h" #include "cuda.h" #include "utils.h" +#include "params.h" } -__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale, float *output) +__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) { int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id < size) output[id] = (rand[id] < prob) ? 0 : input[id]*scale; + if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; } -extern "C" void forward_dropout_layer_gpu(dropout_layer layer, float * input) +extern "C" void forward_dropout_layer_gpu(dropout_layer layer, network_state state) { + if (!state.train) return; int j; int size = layer.inputs*layer.batch; for(j = 0; j < size; ++j) layer.rand[j] = rand_uniform(); cuda_push_array(layer.rand_gpu, layer.rand, layer.inputs*layer.batch); - yoloswag420blazeit360noscope<<>>(input, size, layer.rand_gpu, layer.probability, - layer.scale, layer.output_gpu); + yoloswag420blazeit360noscope<<>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale); check_error(cudaPeekAtLastError()); } -extern "C" void backward_dropout_layer_gpu(dropout_layer layer, float *delta) +extern "C" void backward_dropout_layer_gpu(dropout_layer layer, network_state state) { - if(!delta) return; + if(!state.delta) return; int size = layer.inputs*layer.batch; - yoloswag420blazeit360noscope<<>>(delta, size, layer.rand_gpu, layer.probability, - layer.scale, delta); + yoloswag420blazeit360noscope<<>>(state.delta, size, layer.rand_gpu, layer.probability, layer.scale); check_error(cudaPeekAtLastError()); } diff --git a/src/freeweight_layer.c b/src/freeweight_layer.c deleted file mode 100644 index b4c02dbb..00000000 --- a/src/freeweight_layer.c +++ /dev/null @@ -1,25 +0,0 @@ -#include "freeweight_layer.h" -#include "stdlib.h" -#include "stdio.h" - -freeweight_layer *make_freeweight_layer(int batch, int inputs) -{ - fprintf(stderr, "Freeweight Layer: %d inputs\n", inputs); - freeweight_layer *layer = calloc(1, sizeof(freeweight_layer)); - layer->inputs = inputs; - layer->batch = batch; - return layer; -} - -void forward_freeweight_layer(freeweight_layer layer, float *input) -{ - int i; - for(i = 0; i < layer.batch * layer.inputs; ++i){ - input[i] *= 2.*((float)rand()/RAND_MAX); - } -} - -void backward_freeweight_layer(freeweight_layer layer, float *input, float *delta) -{ - // Don't do shit LULZ -} diff --git a/src/freeweight_layer.h b/src/freeweight_layer.h deleted file mode 100644 index bfca2c19..00000000 --- a/src/freeweight_layer.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef FREEWEIGHT_LAYER_H -#define FREEWEIGHT_LAYER_H - -typedef struct{ - int batch; - int inputs; -} freeweight_layer; - -freeweight_layer *make_freeweight_layer(int batch, int inputs); - -void forward_freeweight_layer(freeweight_layer layer, float *input); -void backward_freeweight_layer(freeweight_layer layer, float *input, float *delta); - -#endif diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index ef7176d9..790cb287 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -58,7 +58,7 @@ void resize_maxpool_layer(maxpool_layer *layer, int h, int w) #endif } -void forward_maxpool_layer(const maxpool_layer layer, float *input) +void forward_maxpool_layer(const maxpool_layer layer, network_state state) { int b,i,j,k,l,m; int w_offset = (-layer.size-1)/2 + 1; @@ -82,7 +82,7 @@ void forward_maxpool_layer(const maxpool_layer layer, float *input) int index = cur_w + layer.w*(cur_h + layer.h*(k + b*layer.c)); int valid = (cur_h >= 0 && cur_h < layer.h && cur_w >= 0 && cur_w < layer.w); - float val = (valid != 0) ? input[index] : -FLT_MAX; + float val = (valid != 0) ? state.input[index] : -FLT_MAX; max_i = (val > max) ? index : max_i; max = (val > max) ? val : max; } @@ -95,16 +95,16 @@ void forward_maxpool_layer(const maxpool_layer layer, float *input) } } -void backward_maxpool_layer(const maxpool_layer layer, float *delta) +void backward_maxpool_layer(const maxpool_layer layer, network_state state) { int i; int h = (layer.h-1)/layer.stride + 1; int w = (layer.w-1)/layer.stride + 1; int c = layer.c; - memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); + memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); for(i = 0; i < h*w*c*layer.batch; ++i){ int index = layer.indexes[i]; - delta[index] += layer.delta[i]; + state.delta[index] += layer.delta[i]; } } diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index 89fb2456..cbd6a767 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -2,6 +2,7 @@ #define MAXPOOL_LAYER_H #include "image.h" +#include "params.h" #include "cuda.h" typedef struct { @@ -22,12 +23,12 @@ typedef struct { image get_maxpool_image(maxpool_layer layer); maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int size, int stride); void resize_maxpool_layer(maxpool_layer *layer, int h, int w); -void forward_maxpool_layer(const maxpool_layer layer, float *input); -void backward_maxpool_layer(const maxpool_layer layer, float *delta); +void forward_maxpool_layer(const maxpool_layer layer, network_state state); +void backward_maxpool_layer(const maxpool_layer layer, network_state state); #ifdef GPU -void forward_maxpool_layer_gpu(maxpool_layer layer, float * input); -void backward_maxpool_layer_gpu(maxpool_layer layer, float * delta); +void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state); +void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state); #endif #endif diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index a5c82096..6c633a97 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -80,7 +80,7 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_ prev_delta[index] = d; } -extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, float *input) +extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state) { int h = (layer.h-1)/layer.stride + 1; int w = (layer.w-1)/layer.stride + 1; @@ -88,15 +88,15 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, float *input) size_t n = h*w*c*layer.batch; - forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, input, layer.output_gpu, layer.indexes_gpu); + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu); check_error(cudaPeekAtLastError()); } -extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, float * delta) +extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) { size_t n = layer.h*layer.w*layer.c*layer.batch; - backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, delta, layer.indexes_gpu); + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu); check_error(cudaPeekAtLastError()); } diff --git a/src/network.c b/src/network.c index b60f0595..89c5621f 100644 --- a/src/network.c +++ b/src/network.c @@ -4,6 +4,7 @@ #include "image.h" #include "data.h" #include "utils.h" +#include "params.h" #include "crop_layer.h" #include "connected_layer.h" @@ -13,7 +14,6 @@ #include "maxpool_layer.h" #include "cost_layer.h" #include "normalization_layer.h" -#include "freeweight_layer.h" #include "softmax_layer.h" #include "dropout_layer.h" @@ -36,8 +36,6 @@ char *get_layer_string(LAYER_TYPE a) return "normalization"; case DROPOUT: return "dropout"; - case FREEWEIGHT: - return "freeweight"; case CROP: return "crop"; case COST: @@ -48,16 +46,18 @@ char *get_layer_string(LAYER_TYPE a) return "none"; } -network make_network(int n, int batch) +network make_network(int n) { network net; net.n = n; - net.batch = batch; net.layers = calloc(net.n, sizeof(void *)); net.types = calloc(net.n, sizeof(LAYER_TYPE)); net.outputs = 0; net.output = 0; net.seen = 0; + net.batch = 0; + net.inputs = 0; + net.h = net.w = net.c = 0; #ifdef GPU net.input_gpu = calloc(1, sizeof(float *)); net.truth_gpu = calloc(1, sizeof(float *)); @@ -65,68 +65,41 @@ network make_network(int n, int batch) return net; } -void forward_network(network net, float *input, float *truth, int train) +void forward_network(network net, network_state state) { int i; for(i = 0; i < net.n; ++i){ if(net.types[i] == CONVOLUTIONAL){ - convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - forward_convolutional_layer(layer, input); - input = layer.output; + forward_convolutional_layer(*(convolutional_layer *)net.layers[i], state); } else if(net.types[i] == DECONVOLUTIONAL){ - deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; - forward_deconvolutional_layer(layer, input); - input = layer.output; + forward_deconvolutional_layer(*(deconvolutional_layer *)net.layers[i], state); } else if(net.types[i] == DETECTION){ - detection_layer layer = *(detection_layer *)net.layers[i]; - forward_detection_layer(layer, input, truth); - input = layer.output; + forward_detection_layer(*(detection_layer *)net.layers[i], state); } else if(net.types[i] == CONNECTED){ - connected_layer layer = *(connected_layer *)net.layers[i]; - forward_connected_layer(layer, input); - input = layer.output; + forward_connected_layer(*(connected_layer *)net.layers[i], state); } else if(net.types[i] == CROP){ - crop_layer layer = *(crop_layer *)net.layers[i]; - forward_crop_layer(layer, train, input); - input = layer.output; + forward_crop_layer(*(crop_layer *)net.layers[i], state); } else if(net.types[i] == COST){ - cost_layer layer = *(cost_layer *)net.layers[i]; - forward_cost_layer(layer, input, truth); + forward_cost_layer(*(cost_layer *)net.layers[i], state); } else if(net.types[i] == SOFTMAX){ - softmax_layer layer = *(softmax_layer *)net.layers[i]; - forward_softmax_layer(layer, input); - input = layer.output; + forward_softmax_layer(*(softmax_layer *)net.layers[i], state); } else if(net.types[i] == MAXPOOL){ - maxpool_layer layer = *(maxpool_layer *)net.layers[i]; - forward_maxpool_layer(layer, input); - input = layer.output; + forward_maxpool_layer(*(maxpool_layer *)net.layers[i], state); } else if(net.types[i] == NORMALIZATION){ - normalization_layer layer = *(normalization_layer *)net.layers[i]; - forward_normalization_layer(layer, input); - input = layer.output; + forward_normalization_layer(*(normalization_layer *)net.layers[i], state); } else if(net.types[i] == DROPOUT){ - if(!train) continue; - dropout_layer layer = *(dropout_layer *)net.layers[i]; - forward_dropout_layer(layer, input); - input = layer.output; + forward_dropout_layer(*(dropout_layer *)net.layers[i], state); } - else if(net.types[i] == FREEWEIGHT){ - if(!train) continue; - //freeweight_layer layer = *(freeweight_layer *)net.layers[i]; - //forward_freeweight_layer(layer, input); - } - //char buff[256]; - //sprintf(buff, "layer %d", i); - //cuda_compare(get_network_output_gpu_layer(net, i), input, get_network_output_size_layer(net, i)*net.batch, buff); + state.input = get_network_output_layer(net, i); } } @@ -136,15 +109,15 @@ void update_network(network net) for(i = 0; i < net.n; ++i){ if(net.types[i] == CONVOLUTIONAL){ convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - update_convolutional_layer(layer); + update_convolutional_layer(layer, net.learning_rate, net.momentum, net.decay); } else if(net.types[i] == DECONVOLUTIONAL){ deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; - update_deconvolutional_layer(layer); + update_deconvolutional_layer(layer, net.learning_rate, net.momentum, net.decay); } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; - update_connected_layer(layer); + update_connected_layer(layer, net.learning_rate, net.momentum, net.decay); } } } @@ -152,37 +125,27 @@ void update_network(network net) float *get_network_output_layer(network net, int i) { if(net.types[i] == CONVOLUTIONAL){ - convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - return layer.output; + return ((convolutional_layer *)net.layers[i]) -> output; } else if(net.types[i] == DECONVOLUTIONAL){ - deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; - return layer.output; + return ((deconvolutional_layer *)net.layers[i]) -> output; } else if(net.types[i] == MAXPOOL){ - maxpool_layer layer = *(maxpool_layer *)net.layers[i]; - return layer.output; + return ((maxpool_layer *)net.layers[i]) -> output; } else if(net.types[i] == DETECTION){ - detection_layer layer = *(detection_layer *)net.layers[i]; - return layer.output; + return ((detection_layer *)net.layers[i]) -> output; } else if(net.types[i] == SOFTMAX){ - softmax_layer layer = *(softmax_layer *)net.layers[i]; - return layer.output; + return ((softmax_layer *)net.layers[i]) -> output; } else if(net.types[i] == DROPOUT){ - dropout_layer layer = *(dropout_layer *)net.layers[i]; - return layer.output; - } else if(net.types[i] == FREEWEIGHT){ return get_network_output_layer(net, i-1); } else if(net.types[i] == CONNECTED){ - connected_layer layer = *(connected_layer *)net.layers[i]; - return layer.output; + return ((connected_layer *)net.layers[i]) -> output; } else if(net.types[i] == CROP){ - crop_layer layer = *(crop_layer *)net.layers[i]; - return layer.output; + return ((crop_layer *)net.layers[i]) -> output; } else if(net.types[i] == NORMALIZATION){ - normalization_layer layer = *(normalization_layer *)net.layers[i]; - return layer.output; + return ((normalization_layer *)net.layers[i]) -> output; } return 0; } + float *get_network_output(network net) { int i; @@ -210,8 +173,6 @@ float *get_network_delta_layer(network net, int i) } else if(net.types[i] == DROPOUT){ if(i == 0) return 0; return get_network_delta_layer(net, i-1); - } else if(net.types[i] == FREEWEIGHT){ - return get_network_delta_layer(net, i-1); } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; return layer.delta; @@ -257,54 +218,53 @@ int get_predicted_class_network(network net) return max_index(out, k); } -void backward_network(network net, float *input, float *truth) +void backward_network(network net, network_state state) { int i; - float *prev_input; - float *prev_delta; + float *original_input = state.input; for(i = net.n-1; i >= 0; --i){ if(i == 0){ - prev_input = input; - prev_delta = 0; + state.input = original_input; + state.delta = 0; }else{ - prev_input = get_network_output_layer(net, i-1); - prev_delta = get_network_delta_layer(net, i-1); + state.input = get_network_output_layer(net, i-1); + state.delta = get_network_delta_layer(net, i-1); } if(net.types[i] == CONVOLUTIONAL){ convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - backward_convolutional_layer(layer, prev_input, prev_delta); + backward_convolutional_layer(layer, state); } else if(net.types[i] == DECONVOLUTIONAL){ deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; - backward_deconvolutional_layer(layer, prev_input, prev_delta); + backward_deconvolutional_layer(layer, state); } else if(net.types[i] == MAXPOOL){ maxpool_layer layer = *(maxpool_layer *)net.layers[i]; - if(i != 0) backward_maxpool_layer(layer, prev_delta); + if(i != 0) backward_maxpool_layer(layer, state); } else if(net.types[i] == DROPOUT){ dropout_layer layer = *(dropout_layer *)net.layers[i]; - backward_dropout_layer(layer, prev_delta); + backward_dropout_layer(layer, state); } else if(net.types[i] == DETECTION){ detection_layer layer = *(detection_layer *)net.layers[i]; - backward_detection_layer(layer, prev_input, prev_delta); + backward_detection_layer(layer, state); } else if(net.types[i] == NORMALIZATION){ normalization_layer layer = *(normalization_layer *)net.layers[i]; - if(i != 0) backward_normalization_layer(layer, prev_input, prev_delta); + if(i != 0) backward_normalization_layer(layer, state); } else if(net.types[i] == SOFTMAX){ softmax_layer layer = *(softmax_layer *)net.layers[i]; - if(i != 0) backward_softmax_layer(layer, prev_delta); + if(i != 0) backward_softmax_layer(layer, state); } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; - backward_connected_layer(layer, prev_input, prev_delta); + backward_connected_layer(layer, state); } else if(net.types[i] == COST){ cost_layer layer = *(cost_layer *)net.layers[i]; - backward_cost_layer(layer, prev_input, prev_delta); + backward_cost_layer(layer, state); } } } @@ -314,8 +274,12 @@ float train_network_datum(network net, float *x, float *y) #ifdef GPU if(gpu_index >= 0) return train_network_datum_gpu(net, x, y); #endif - forward_network(net, x, y, 1); - backward_network(net, x, y); + network_state state; + state.input = x; + state.truth = y; + state.train = 1; + forward_network(net, state); + backward_network(net, state); float error = get_network_cost(net); update_network(net); return error; @@ -361,15 +325,17 @@ float train_network(network net, data d) float train_network_batch(network net, data d, int n) { int i,j; + network_state state; + state.train = 1; float sum = 0; int batch = 2; for(i = 0; i < n; ++i){ for(j = 0; j < batch; ++j){ int index = rand()%d.X.rows; - float *x = d.X.vals[index]; - float *y = d.y.vals[index]; - forward_network(net, x, y, 1); - backward_network(net, x, y); + state.input = d.X.vals[index]; + state.truth = d.y.vals[index]; + forward_network(net, state); + backward_network(net, state); sum += get_network_cost(net); } update_network(net); @@ -377,28 +343,6 @@ float train_network_batch(network net, data d, int n) return (float)sum/(n*batch); } -void set_learning_network(network *net, float rate, float momentum, float decay) -{ - int i; - net->learning_rate=rate; - net->momentum = momentum; - net->decay = decay; - for(i = 0; i < net->n; ++i){ - if(net->types[i] == CONVOLUTIONAL){ - convolutional_layer *layer = (convolutional_layer *)net->layers[i]; - layer->learning_rate=rate; - layer->momentum = momentum; - layer->decay = decay; - } - else if(net->types[i] == CONNECTED){ - connected_layer *layer = (connected_layer *)net->layers[i]; - layer->learning_rate=rate; - layer->momentum = momentum; - layer->decay = decay; - } - } -} - void set_batch_network(network *net, int b) { net->batch = b; @@ -425,10 +369,6 @@ void set_batch_network(network *net, int b) detection_layer *layer = (detection_layer *) net->layers[i]; layer->batch = b; } - else if(net->types[i] == FREEWEIGHT){ - freeweight_layer *layer = (freeweight_layer *) net->layers[i]; - layer->batch = b; - } else if(net->types[i] == SOFTMAX){ softmax_layer *layer = (softmax_layer *)net->layers[i]; layer->batch = b; @@ -472,15 +412,11 @@ int get_network_input_size_layer(network net, int i) crop_layer layer = *(crop_layer *) net.layers[i]; return layer.c*layer.h*layer.w; } - else if(net.types[i] == FREEWEIGHT){ - freeweight_layer layer = *(freeweight_layer *) net.layers[i]; - return layer.inputs; - } else if(net.types[i] == SOFTMAX){ softmax_layer layer = *(softmax_layer *)net.layers[i]; return layer.inputs; } - printf("Can't find input size\n"); + fprintf(stderr, "Can't find input size\n"); return 0; } @@ -505,7 +441,7 @@ int get_network_output_size_layer(network net, int i) image output = get_maxpool_image(layer); return output.h*output.w*output.c; } - else if(net.types[i] == CROP){ + else if(net.types[i] == CROP){ crop_layer layer = *(crop_layer *) net.layers[i]; return layer.c*layer.crop_height*layer.crop_width; } @@ -517,15 +453,11 @@ int get_network_output_size_layer(network net, int i) dropout_layer layer = *(dropout_layer *) net.layers[i]; return layer.inputs; } - else if(net.types[i] == FREEWEIGHT){ - freeweight_layer layer = *(freeweight_layer *) net.layers[i]; - return layer.inputs; - } else if(net.types[i] == SOFTMAX){ softmax_layer layer = *(softmax_layer *)net.layers[i]; return layer.inputs; } - printf("Can't find output size\n"); + fprintf(stderr, "Can't find output size\n"); return 0; } @@ -650,11 +582,16 @@ void top_predictions(network net, int k, int *index) float *network_predict(network net, float *input) { - #ifdef GPU +#ifdef GPU if(gpu_index >= 0) return network_predict_gpu(net, input); - #endif +#endif - forward_network(net, input, 0, 0); + network_state state; + state.input = input; + state.truth = 0; + state.train = 0; + state.delta = 0; + forward_network(net, state); float *out = get_network_output(net); return out; } diff --git a/src/network.h b/src/network.h index d2fb346c..9099b244 100644 --- a/src/network.h +++ b/src/network.h @@ -3,6 +3,7 @@ #define NETWORK_H #include "image.h" +#include "params.h" #include "data.h" typedef enum { @@ -14,7 +15,6 @@ typedef enum { DETECTION, NORMALIZATION, DROPOUT, - FREEWEIGHT, CROP, COST } LAYER_TYPE; @@ -31,6 +31,9 @@ typedef struct { int outputs; float *output; + int inputs; + int h, w, c; + #ifdef GPU float **input_gpu; float **truth_gpu; @@ -47,9 +50,9 @@ float * get_network_delta_gpu_layer(network net, int i); void compare_networks(network n1, network n2, data d); char *get_layer_string(LAYER_TYPE a); -network make_network(int n, int batch); -void forward_network(network net, float *input, float *truth, int train); -void backward_network(network net, float *input, float *truth); +network make_network(int n); +void forward_network(network net, network_state state); +void backward_network(network net, network_state state); void update_network(network net); float train_network(network net, data d); @@ -75,7 +78,6 @@ void print_network(network net); void visualize_network(network net); int resize_network(network net, int h, int w, int c); void set_batch_network(network *net, int b); -void set_learning_network(network *net, float rate, float momentum, float decay); int get_network_input_size(network net); float get_network_cost(network net); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 928c7f95..acc31d7c 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -6,6 +6,7 @@ extern "C" { #include "image.h" #include "data.h" #include "utils.h" +#include "params.h" #include "crop_layer.h" #include "connected_layer.h" @@ -15,7 +16,6 @@ extern "C" { #include "maxpool_layer.h" #include "cost_layer.h" #include "normalization_layer.h" -#include "freeweight_layer.h" #include "softmax_layer.h" #include "dropout_layer.h" } @@ -24,108 +24,78 @@ extern "C" float * get_network_output_gpu_layer(network net, int i); extern "C" float * get_network_delta_gpu_layer(network net, int i); float *get_network_output_gpu(network net); -void forward_network_gpu(network net, float * input, float * truth, int train) +void forward_network_gpu(network net, network_state state) { int i; for(i = 0; i < net.n; ++i){ - //clock_t time = clock(); if(net.types[i] == CONVOLUTIONAL){ - convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - forward_convolutional_layer_gpu(layer, input); - input = layer.output_gpu; + forward_convolutional_layer_gpu(*(convolutional_layer *)net.layers[i], state); } else if(net.types[i] == DECONVOLUTIONAL){ - deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; - forward_deconvolutional_layer_gpu(layer, input); - input = layer.output_gpu; + forward_deconvolutional_layer_gpu(*(deconvolutional_layer *)net.layers[i], state); } else if(net.types[i] == COST){ - cost_layer layer = *(cost_layer *)net.layers[i]; - forward_cost_layer_gpu(layer, input, truth); + forward_cost_layer_gpu(*(cost_layer *)net.layers[i], state); } else if(net.types[i] == CONNECTED){ - connected_layer layer = *(connected_layer *)net.layers[i]; - forward_connected_layer_gpu(layer, input); - input = layer.output_gpu; + forward_connected_layer_gpu(*(connected_layer *)net.layers[i], state); } else if(net.types[i] == DETECTION){ - detection_layer layer = *(detection_layer *)net.layers[i]; - forward_detection_layer_gpu(layer, input, truth); - input = layer.output_gpu; + forward_detection_layer_gpu(*(detection_layer *)net.layers[i], state); } else if(net.types[i] == MAXPOOL){ - maxpool_layer layer = *(maxpool_layer *)net.layers[i]; - forward_maxpool_layer_gpu(layer, input); - input = layer.output_gpu; + forward_maxpool_layer_gpu(*(maxpool_layer *)net.layers[i], state); } else if(net.types[i] == SOFTMAX){ - softmax_layer layer = *(softmax_layer *)net.layers[i]; - forward_softmax_layer_gpu(layer, input); - input = layer.output_gpu; + forward_softmax_layer_gpu(*(softmax_layer *)net.layers[i], state); } else if(net.types[i] == DROPOUT){ - if(!train) continue; - dropout_layer layer = *(dropout_layer *)net.layers[i]; - forward_dropout_layer_gpu(layer, input); - input = layer.output_gpu; + forward_dropout_layer_gpu(*(dropout_layer *)net.layers[i], state); } else if(net.types[i] == CROP){ - crop_layer layer = *(crop_layer *)net.layers[i]; - forward_crop_layer_gpu(layer, train, input); - input = layer.output_gpu; + forward_crop_layer_gpu(*(crop_layer *)net.layers[i], state); } - //cudaDeviceSynchronize(); - //printf("Forward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time)); + state.input = get_network_output_gpu_layer(net, i); } } -void backward_network_gpu(network net, float * input, float *truth) +void backward_network_gpu(network net, network_state state) { int i; - float * prev_input; - float * prev_delta; + float * original_input = state.input; for(i = net.n-1; i >= 0; --i){ //clock_t time = clock(); if(i == 0){ - prev_input = input; - prev_delta = 0; + state.input = original_input; + state.delta = 0; }else{ - prev_input = get_network_output_gpu_layer(net, i-1); - prev_delta = get_network_delta_gpu_layer(net, i-1); + state.input = get_network_output_gpu_layer(net, i-1); + state.delta = get_network_delta_gpu_layer(net, i-1); } if(net.types[i] == CONVOLUTIONAL){ - convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - backward_convolutional_layer_gpu(layer, prev_input, prev_delta); + backward_convolutional_layer_gpu(*(convolutional_layer *)net.layers[i], state); } else if(net.types[i] == DECONVOLUTIONAL){ - deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; - backward_deconvolutional_layer_gpu(layer, prev_input, prev_delta); + backward_deconvolutional_layer_gpu(*(deconvolutional_layer *)net.layers[i], state); } else if(net.types[i] == COST){ - cost_layer layer = *(cost_layer *)net.layers[i]; - backward_cost_layer_gpu(layer, prev_input, prev_delta); + backward_cost_layer_gpu(*(cost_layer *)net.layers[i], state); } else if(net.types[i] == CONNECTED){ - connected_layer layer = *(connected_layer *)net.layers[i]; - backward_connected_layer_gpu(layer, prev_input, prev_delta); + backward_connected_layer_gpu(*(connected_layer *)net.layers[i], state); } else if(net.types[i] == DETECTION){ - detection_layer layer = *(detection_layer *)net.layers[i]; - backward_detection_layer_gpu(layer, prev_input, prev_delta); + backward_detection_layer_gpu(*(detection_layer *)net.layers[i], state); } else if(net.types[i] == MAXPOOL){ - maxpool_layer layer = *(maxpool_layer *)net.layers[i]; - backward_maxpool_layer_gpu(layer, prev_delta); + backward_maxpool_layer_gpu(*(maxpool_layer *)net.layers[i], state); } else if(net.types[i] == DROPOUT){ - dropout_layer layer = *(dropout_layer *)net.layers[i]; - backward_dropout_layer_gpu(layer, prev_delta); + backward_dropout_layer_gpu(*(dropout_layer *)net.layers[i], state); } else if(net.types[i] == SOFTMAX){ - softmax_layer layer = *(softmax_layer *)net.layers[i]; - backward_softmax_layer_gpu(layer, prev_delta); + backward_softmax_layer_gpu(*(softmax_layer *)net.layers[i], state); } - //printf("Backward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time)); } } @@ -135,15 +105,15 @@ void update_network_gpu(network net) for(i = 0; i < net.n; ++i){ if(net.types[i] == CONVOLUTIONAL){ convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - update_convolutional_layer_gpu(layer); + update_convolutional_layer_gpu(layer, net.learning_rate, net.momentum, net.decay); } else if(net.types[i] == DECONVOLUTIONAL){ deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; - update_deconvolutional_layer_gpu(layer); + update_deconvolutional_layer_gpu(layer, net.learning_rate, net.momentum, net.decay); } else if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *)net.layers[i]; - update_connected_layer_gpu(layer); + update_connected_layer_gpu(layer, net.learning_rate, net.momentum, net.decay); } } } @@ -151,35 +121,28 @@ void update_network_gpu(network net) float * get_network_output_gpu_layer(network net, int i) { if(net.types[i] == CONVOLUTIONAL){ - convolutional_layer layer = *(convolutional_layer *)net.layers[i]; - return layer.output_gpu; + return ((convolutional_layer *)net.layers[i]) -> output_gpu; } else if(net.types[i] == DECONVOLUTIONAL){ - deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; - return layer.output_gpu; + return ((deconvolutional_layer *)net.layers[i]) -> output_gpu; } else if(net.types[i] == DETECTION){ - detection_layer layer = *(detection_layer *)net.layers[i]; - return layer.output_gpu; + return ((detection_layer *)net.layers[i]) -> output_gpu; } else if(net.types[i] == CONNECTED){ - connected_layer layer = *(connected_layer *)net.layers[i]; - return layer.output_gpu; + return ((connected_layer *)net.layers[i]) -> output_gpu; } else if(net.types[i] == MAXPOOL){ - maxpool_layer layer = *(maxpool_layer *)net.layers[i]; - return layer.output_gpu; + return ((maxpool_layer *)net.layers[i]) -> output_gpu; } else if(net.types[i] == CROP){ - crop_layer layer = *(crop_layer *)net.layers[i]; - return layer.output_gpu; + return ((crop_layer *)net.layers[i]) -> output_gpu; } else if(net.types[i] == SOFTMAX){ - softmax_layer layer = *(softmax_layer *)net.layers[i]; - return layer.output_gpu; - } else if(net.types[i] == DROPOUT){ - dropout_layer layer = *(dropout_layer *)net.layers[i]; - return layer.output_gpu; + return ((softmax_layer *)net.layers[i]) -> output_gpu; + } + else if(net.types[i] == DROPOUT){ + return get_network_output_gpu_layer(net, i-1); } return 0; } @@ -219,6 +182,7 @@ float * get_network_delta_gpu_layer(network net, int i) float train_network_datum_gpu(network net, float *x, float *y) { //clock_t time = clock(); + network_state state; int x_size = get_network_input_size(net)*net.batch; int y_size = get_network_output_size(net)*net.batch; if(!*net.input_gpu){ @@ -228,12 +192,15 @@ float train_network_datum_gpu(network net, float *x, float *y) cuda_push_array(*net.input_gpu, x, x_size); cuda_push_array(*net.truth_gpu, y, y_size); } + state.input = *net.input_gpu; + state.truth = *net.truth_gpu; + state.train = 1; //printf("trans %f\n", sec(clock() - time)); //time = clock(); - forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1); + forward_network_gpu(net, state); //printf("forw %f\n", sec(clock() - time)); //time = clock(); - backward_network_gpu(net, *net.input_gpu, *net.truth_gpu); + backward_network_gpu(net, state); //printf("back %f\n", sec(clock() - time)); //time = clock(); update_network_gpu(net); @@ -291,10 +258,14 @@ float *network_predict_gpu(network net, float *input) { int size = get_network_input_size(net) * net.batch; - float * input_gpu = cuda_make_array(input, size); - forward_network_gpu(net, input_gpu, 0, 0); + network_state state; + state.input = cuda_make_array(input, size); + state.truth = 0; + state.train = 0; + state.delta = 0; + forward_network_gpu(net, state); float *out = get_network_output_gpu(net); - cuda_free(input_gpu); + cuda_free(state.input); return out; } diff --git a/src/normalization_layer.c b/src/normalization_layer.c index d82451bc..3ab318b6 100644 --- a/src/normalization_layer.c +++ b/src/normalization_layer.c @@ -59,28 +59,29 @@ void sub_square_array(float *src, float *dest, int n) } } -void forward_normalization_layer(const normalization_layer layer, float *in) +void forward_normalization_layer(const normalization_layer layer, network_state state) { int i,j,k; memset(layer.sums, 0, layer.h*layer.w*sizeof(float)); int imsize = layer.h*layer.w; for(j = 0; j < layer.size/2; ++j){ - if(j < layer.c) add_square_array(in+j*imsize, layer.sums, imsize); + if(j < layer.c) add_square_array(state.input+j*imsize, layer.sums, imsize); } for(k = 0; k < layer.c; ++k){ int next = k+layer.size/2; int prev = k-layer.size/2-1; - if(next < layer.c) add_square_array(in+next*imsize, layer.sums, imsize); - if(prev > 0) sub_square_array(in+prev*imsize, layer.sums, imsize); + if(next < layer.c) add_square_array(state.input+next*imsize, layer.sums, imsize); + if(prev > 0) sub_square_array(state.input+prev*imsize, layer.sums, imsize); for(i = 0; i < imsize; ++i){ - layer.output[k*imsize + i] = in[k*imsize+i] / pow(layer.kappa + layer.alpha * layer.sums[i], layer.beta); + layer.output[k*imsize + i] = state.input[k*imsize+i] / pow(layer.kappa + layer.alpha * layer.sums[i], layer.beta); } } } -void backward_normalization_layer(const normalization_layer layer, float *in, float *delta) +void backward_normalization_layer(const normalization_layer layer, network_state state) { - //TODO! + // TODO! + // OR NOT TODO!! } void visualize_normalization_layer(normalization_layer layer, char *window) diff --git a/src/normalization_layer.h b/src/normalization_layer.h index 914fe7d0..11f2827d 100644 --- a/src/normalization_layer.h +++ b/src/normalization_layer.h @@ -2,6 +2,7 @@ #define NORMALIZATION_LAYER_H #include "image.h" +#include "params.h" typedef struct { int batch; @@ -18,8 +19,8 @@ typedef struct { image get_normalization_image(normalization_layer layer); normalization_layer *make_normalization_layer(int batch, int h, int w, int c, int size, float alpha, float beta, float kappa); void resize_normalization_layer(normalization_layer *layer, int h, int w); -void forward_normalization_layer(const normalization_layer layer, float *in); -void backward_normalization_layer(const normalization_layer layer, float *in, float *delta); +void forward_normalization_layer(const normalization_layer layer, network_state state); +void backward_normalization_layer(const normalization_layer layer, network_state state); void visualize_normalization_layer(normalization_layer layer, char *window); #endif diff --git a/src/params.h b/src/params.h new file mode 100644 index 00000000..7343a079 --- /dev/null +++ b/src/params.h @@ -0,0 +1,12 @@ +#ifndef PARAMS_H +#define PARAMS_H + +typedef struct { + float *truth; + float *input; + float *delta; + int train; +} network_state; + +#endif + diff --git a/src/parser.c b/src/parser.c index 7b1057ee..d7c4a310 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,7 +14,6 @@ #include "softmax_layer.h" #include "dropout_layer.h" #include "detection_layer.h" -#include "freeweight_layer.h" #include "list.h" #include "option_list.h" #include "utils.h" @@ -24,12 +23,12 @@ typedef struct{ list *options; }section; +int is_network(section *s); int is_convolutional(section *s); int is_deconvolutional(section *s); int is_connected(section *s); int is_maxpool(section *s); int is_dropout(section *s); -int is_freeweight(section *s); int is_softmax(section *s); int is_crop(section *s); int is_cost(section *s); @@ -69,38 +68,31 @@ void parse_data(char *data, float *a, int n) } } -deconvolutional_layer *parse_deconvolutional(list *options, network *net, int count) +typedef struct size_params{ + int batch; + int inputs; + int h; + int w; + int c; +} size_params; + +deconvolutional_layer *parse_deconvolutional(list *options, size_params params) { - int h,w,c; - float learning_rate, momentum, decay; int n = option_find_int(options, "filters",1); int size = option_find_int(options, "size",1); int stride = option_find_int(options, "stride",1); char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); - if(count == 0){ - learning_rate = option_find_float(options, "learning_rate", .001); - momentum = option_find_float(options, "momentum", .9); - decay = option_find_float(options, "decay", .0001); - h = option_find_int(options, "height",1); - w = option_find_int(options, "width",1); - c = option_find_int(options, "channels",1); - net->batch = option_find_int(options, "batch",1); - net->learning_rate = learning_rate; - net->momentum = momentum; - net->decay = decay; - net->seen = option_find_int(options, "seen",0); - }else{ - learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate); - momentum = option_find_float_quiet(options, "momentum", net->momentum); - decay = option_find_float_quiet(options, "decay", net->decay); - image m = get_network_image_layer(*net, count-1); - h = m.h; - w = m.w; - c = m.c; - if(h == 0) error("Layer before deconvolutional layer must output image."); - } - deconvolutional_layer *layer = make_deconvolutional_layer(net->batch,h,w,c,n,size,stride,activation,learning_rate,momentum,decay); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); + + deconvolutional_layer *layer = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation); + char *weights = option_find_str(options, "weights", 0); char *biases = option_find_str(options, "biases", 0); parse_data(weights, layer->filters, c*n*size*size); @@ -112,39 +104,24 @@ deconvolutional_layer *parse_deconvolutional(list *options, network *net, int co return layer; } -convolutional_layer *parse_convolutional(list *options, network *net, int count) +convolutional_layer *parse_convolutional(list *options, size_params params) { - int h,w,c; - float learning_rate, momentum, decay; int n = option_find_int(options, "filters",1); int size = option_find_int(options, "size",1); int stride = option_find_int(options, "stride",1); int pad = option_find_int(options, "pad",0); char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); - if(count == 0){ - learning_rate = option_find_float(options, "learning_rate", .001); - momentum = option_find_float(options, "momentum", .9); - decay = option_find_float(options, "decay", .0001); - h = option_find_int(options, "height",1); - w = option_find_int(options, "width",1); - c = option_find_int(options, "channels",1); - net->batch = option_find_int(options, "batch",1); - net->learning_rate = learning_rate; - net->momentum = momentum; - net->decay = decay; - net->seen = option_find_int(options, "seen",0); - }else{ - learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate); - momentum = option_find_float_quiet(options, "momentum", net->momentum); - decay = option_find_float_quiet(options, "decay", net->decay); - image m = get_network_image_layer(*net, count-1); - h = m.h; - w = m.w; - c = m.c; - if(h == 0) error("Layer before convolutional layer must output image."); - } - convolutional_layer *layer = make_convolutional_layer(net->batch,h,w,c,n,size,stride,pad,activation,learning_rate,momentum,decay); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before convolutional layer must output image."); + + convolutional_layer *layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation); + char *weights = option_find_str(options, "weights", 0); char *biases = option_find_str(options, "biases", 0); parse_data(weights, layer->filters, c*n*size*size); @@ -156,33 +133,18 @@ convolutional_layer *parse_convolutional(list *options, network *net, int count) return layer; } -connected_layer *parse_connected(list *options, network *net, int count) +connected_layer *parse_connected(list *options, size_params params) { - int input; - float learning_rate, momentum, decay; int output = option_find_int(options, "output",1); char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); - if(count == 0){ - input = option_find_int(options, "input",1); - net->batch = option_find_int(options, "batch",1); - learning_rate = option_find_float(options, "learning_rate", .001); - momentum = option_find_float(options, "momentum", .9); - decay = option_find_float(options, "decay", .0001); - net->learning_rate = learning_rate; - net->momentum = momentum; - net->decay = decay; - }else{ - learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate); - momentum = option_find_float_quiet(options, "momentum", net->momentum); - decay = option_find_float_quiet(options, "decay", net->decay); - input = get_network_output_size_layer(*net, count-1); - } - connected_layer *layer = make_connected_layer(net->batch, input, output, activation,learning_rate,momentum,decay); + + connected_layer *layer = make_connected_layer(params.batch, params.inputs, output, activation); + char *weights = option_find_str(options, "weights", 0); char *biases = option_find_str(options, "biases", 0); parse_data(biases, layer->biases, output); - parse_data(weights, layer->weights, input*output); + parse_data(weights, layer->weights, params.inputs*output); #ifdef GPU if(weights || biases) push_connected_layer(*layer); #endif @@ -190,235 +152,188 @@ connected_layer *parse_connected(list *options, network *net, int count) return layer; } -softmax_layer *parse_softmax(list *options, network *net, int count) +softmax_layer *parse_softmax(list *options, size_params params) { - int input; int groups = option_find_int(options, "groups",1); - if(count == 0){ - input = option_find_int(options, "input",1); - net->batch = option_find_int(options, "batch",1); - net->seen = option_find_int(options, "seen",0); - }else{ - input = get_network_output_size_layer(*net, count-1); - } - softmax_layer *layer = make_softmax_layer(net->batch, groups, input); + softmax_layer *layer = make_softmax_layer(params.batch, params.inputs, groups); option_unused(options); return layer; } -detection_layer *parse_detection(list *options, network *net, int count) +detection_layer *parse_detection(list *options, size_params params) { - int input; - if(count == 0){ - input = option_find_int(options, "input",1); - net->batch = option_find_int(options, "batch",1); - net->seen = option_find_int(options, "seen",0); - }else{ - input = get_network_output_size_layer(*net, count-1); - } int coords = option_find_int(options, "coords", 1); int classes = option_find_int(options, "classes", 1); int rescore = option_find_int(options, "rescore", 1); - detection_layer *layer = make_detection_layer(net->batch, input, classes, coords, rescore); + detection_layer *layer = make_detection_layer(params.batch, params.inputs, classes, coords, rescore); option_unused(options); return layer; } -cost_layer *parse_cost(list *options, network *net, int count) +cost_layer *parse_cost(list *options, size_params params) { - int input; - if(count == 0){ - input = option_find_int(options, "input",1); - net->batch = option_find_int(options, "batch",1); - net->seen = option_find_int(options, "seen",0); - }else{ - input = get_network_output_size_layer(*net, count-1); - } char *type_s = option_find_str(options, "type", "sse"); COST_TYPE type = get_cost_type(type_s); - cost_layer *layer = make_cost_layer(net->batch, input, type); + cost_layer *layer = make_cost_layer(params.batch, params.inputs, type); option_unused(options); return layer; } -crop_layer *parse_crop(list *options, network *net, int count) +crop_layer *parse_crop(list *options, size_params params) { - float learning_rate, momentum, decay; - int h,w,c; int crop_height = option_find_int(options, "crop_height",1); int crop_width = option_find_int(options, "crop_width",1); int flip = option_find_int(options, "flip",0); - if(count == 0){ - h = option_find_int(options, "height",1); - w = option_find_int(options, "width",1); - c = option_find_int(options, "channels",1); - net->batch = option_find_int(options, "batch",1); - learning_rate = option_find_float(options, "learning_rate", .001); - momentum = option_find_float(options, "momentum", .9); - decay = option_find_float(options, "decay", .0001); - net->learning_rate = learning_rate; - net->momentum = momentum; - net->decay = decay; - net->seen = option_find_int(options, "seen",0); - }else{ - image m = get_network_image_layer(*net, count-1); - h = m.h; - w = m.w; - c = m.c; - if(h == 0) error("Layer before crop layer must output image."); - } - crop_layer *layer = make_crop_layer(net->batch,h,w,c,crop_height,crop_width,flip); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before crop layer must output image."); + + crop_layer *layer = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip); option_unused(options); return layer; } -maxpool_layer *parse_maxpool(list *options, network *net, int count) +maxpool_layer *parse_maxpool(list *options, size_params params) { - int h,w,c; int stride = option_find_int(options, "stride",1); int size = option_find_int(options, "size",stride); - if(count == 0){ - h = option_find_int(options, "height",1); - w = option_find_int(options, "width",1); - c = option_find_int(options, "channels",1); - net->batch = option_find_int(options, "batch",1); - net->seen = option_find_int(options, "seen",0); - }else{ - image m = get_network_image_layer(*net, count-1); - h = m.h; - w = m.w; - c = m.c; - if(h == 0) error("Layer before convolutional layer must output image."); - } - maxpool_layer *layer = make_maxpool_layer(net->batch,h,w,c,size,stride); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before maxpool layer must output image."); + + maxpool_layer *layer = make_maxpool_layer(batch,h,w,c,size,stride); option_unused(options); return layer; } -/* -freeweight_layer *parse_freeweight(list *options, network *net, int count) +dropout_layer *parse_dropout(list *options, size_params params) { - int input; - if(count == 0){ - net->batch = option_find_int(options, "batch",1); - input = option_find_int(options, "input",1); - }else{ - input = get_network_output_size_layer(*net, count-1); - } - freeweight_layer *layer = make_freeweight_layer(net->batch,input); - option_unused(options); - return layer; -} -*/ - -dropout_layer *parse_dropout(list *options, network *net, int count) -{ - int input; float probability = option_find_float(options, "probability", .5); - if(count == 0){ - net->batch = option_find_int(options, "batch",1); - input = option_find_int(options, "input",1); - float learning_rate = option_find_float(options, "learning_rate", .001); - float momentum = option_find_float(options, "momentum", .9); - float decay = option_find_float(options, "decay", .0001); - net->learning_rate = learning_rate; - net->momentum = momentum; - net->decay = decay; - net->seen = option_find_int(options, "seen",0); - }else{ - input = get_network_output_size_layer(*net, count-1); - } - dropout_layer *layer = make_dropout_layer(net->batch,input,probability); + dropout_layer *layer = make_dropout_layer(params.batch, params.inputs, probability); option_unused(options); return layer; } -normalization_layer *parse_normalization(list *options, network *net, int count) +normalization_layer *parse_normalization(list *options, size_params params) { - int h,w,c; int size = option_find_int(options, "size",1); float alpha = option_find_float(options, "alpha", 0.); float beta = option_find_float(options, "beta", 1.); float kappa = option_find_float(options, "kappa", 1.); - if(count == 0){ - h = option_find_int(options, "height",1); - w = option_find_int(options, "width",1); - c = option_find_int(options, "channels",1); - net->batch = option_find_int(options, "batch",1); - net->seen = option_find_int(options, "seen",0); - }else{ - image m = get_network_image_layer(*net, count-1); - h = m.h; - w = m.w; - c = m.c; - if(h == 0) error("Layer before convolutional layer must output image."); - } - normalization_layer *layer = make_normalization_layer(net->batch,h,w,c,size, alpha, beta, kappa); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before normalization layer must output image."); + + normalization_layer *layer = make_normalization_layer(batch,h,w,c,size, alpha, beta, kappa); option_unused(options); return layer; } +void parse_net_options(list *options, network *net) +{ + net->batch = option_find_int(options, "batch",1); + net->learning_rate = option_find_float(options, "learning_rate", .001); + net->momentum = option_find_float(options, "momentum", .9); + net->decay = option_find_float(options, "decay", .0001); + net->seen = option_find_int(options, "seen",0); + + net->h = option_find_int_quiet(options, "height",0); + net->w = option_find_int_quiet(options, "width",0); + net->c = option_find_int_quiet(options, "channels",0); + net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); +} + network parse_network_cfg(char *filename) { list *sections = read_cfg(filename); - network net = make_network(sections->size, 0); - node *n = sections->front; + if(!n) error("Config file has no sections"); + network net = make_network(sections->size - 1); + size_params params; + + section *s = (section *)n->val; + list *options = s->options; + if(!is_network(s)) error("First section must be [net] or [network]"); + parse_net_options(options, &net); + + params.h = net.h; + params.w = net.w; + params.c = net.c; + params.inputs = net.inputs; + params.batch = net.batch; + + n = n->next; int count = 0; while(n){ - section *s = (section *)n->val; - list *options = s->options; + fprintf(stderr, "%d: ", count); + s = (section *)n->val; + options = s->options; if(is_convolutional(s)){ - convolutional_layer *layer = parse_convolutional(options, &net, count); + convolutional_layer *layer = parse_convolutional(options, params); net.types[count] = CONVOLUTIONAL; net.layers[count] = layer; }else if(is_deconvolutional(s)){ - deconvolutional_layer *layer = parse_deconvolutional(options, &net, count); + deconvolutional_layer *layer = parse_deconvolutional(options, params); net.types[count] = DECONVOLUTIONAL; net.layers[count] = layer; }else if(is_connected(s)){ - connected_layer *layer = parse_connected(options, &net, count); + connected_layer *layer = parse_connected(options, params); net.types[count] = CONNECTED; net.layers[count] = layer; }else if(is_crop(s)){ - crop_layer *layer = parse_crop(options, &net, count); + crop_layer *layer = parse_crop(options, params); net.types[count] = CROP; net.layers[count] = layer; }else if(is_cost(s)){ - cost_layer *layer = parse_cost(options, &net, count); + cost_layer *layer = parse_cost(options, params); net.types[count] = COST; net.layers[count] = layer; }else if(is_detection(s)){ - detection_layer *layer = parse_detection(options, &net, count); + detection_layer *layer = parse_detection(options, params); net.types[count] = DETECTION; net.layers[count] = layer; }else if(is_softmax(s)){ - softmax_layer *layer = parse_softmax(options, &net, count); + softmax_layer *layer = parse_softmax(options, params); net.types[count] = SOFTMAX; net.layers[count] = layer; }else if(is_maxpool(s)){ - maxpool_layer *layer = parse_maxpool(options, &net, count); + maxpool_layer *layer = parse_maxpool(options, params); net.types[count] = MAXPOOL; net.layers[count] = layer; }else if(is_normalization(s)){ - normalization_layer *layer = parse_normalization(options, &net, count); + normalization_layer *layer = parse_normalization(options, params); net.types[count] = NORMALIZATION; net.layers[count] = layer; }else if(is_dropout(s)){ - dropout_layer *layer = parse_dropout(options, &net, count); + dropout_layer *layer = parse_dropout(options, params); net.types[count] = DROPOUT; net.layers[count] = layer; - }else if(is_freeweight(s)){ - //freeweight_layer *layer = parse_freeweight(options, &net, count); - //net.types[count] = FREEWEIGHT; - //net.layers[count] = layer; - fprintf(stderr, "Type not recognized: %s\n", s->type); }else{ fprintf(stderr, "Type not recognized: %s\n", s->type); } free_section(s); - ++count; n = n->next; + if(n){ + image im = get_network_image_layer(net, count); + params.h = im.h; + params.w = im.w; + params.c = im.c; + params.inputs = get_network_output_size_layer(net, count); + } + ++count; } free_list(sections); net.outputs = get_network_output_size(net); @@ -448,6 +363,11 @@ int is_convolutional(section *s) return (strcmp(s->type, "[conv]")==0 || strcmp(s->type, "[convolutional]")==0); } +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} int is_connected(section *s) { return (strcmp(s->type, "[conn]")==0 @@ -462,10 +382,6 @@ int is_dropout(section *s) { return (strcmp(s->type, "[dropout]")==0); } -int is_freeweight(section *s) -{ - return (strcmp(s->type, "[freeweight]")==0); -} int is_softmax(section *s) { @@ -533,29 +449,11 @@ list *read_cfg(char *filename) void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int count) { - #ifdef GPU +#ifdef GPU if(gpu_index >= 0) pull_convolutional_layer(*l); - #endif +#endif int i; fprintf(fp, "[convolutional]\n"); - if(count == 0) { - fprintf(fp, "batch=%d\n" - "height=%d\n" - "width=%d\n" - "channels=%d\n" - "learning_rate=%g\n" - "momentum=%g\n" - "decay=%g\n" - "seen=%d\n", - l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay, net.seen); - } else { - if(l->learning_rate != net.learning_rate) - fprintf(fp, "learning_rate=%g\n", l->learning_rate); - if(l->momentum != net.momentum) - fprintf(fp, "momentum=%g\n", l->momentum); - if(l->decay != net.decay) - fprintf(fp, "decay=%g\n", l->decay); - } fprintf(fp, "filters=%d\n" "size=%d\n" "stride=%d\n" @@ -573,29 +471,11 @@ void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int void print_deconvolutional_cfg(FILE *fp, deconvolutional_layer *l, network net, int count) { - #ifdef GPU +#ifdef GPU if(gpu_index >= 0) pull_deconvolutional_layer(*l); - #endif +#endif int i; fprintf(fp, "[deconvolutional]\n"); - if(count == 0) { - fprintf(fp, "batch=%d\n" - "height=%d\n" - "width=%d\n" - "channels=%d\n" - "learning_rate=%g\n" - "momentum=%g\n" - "decay=%g\n" - "seen=%d\n", - l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay, net.seen); - } else { - if(l->learning_rate != net.learning_rate) - fprintf(fp, "learning_rate=%g\n", l->learning_rate); - if(l->momentum != net.momentum) - fprintf(fp, "momentum=%g\n", l->momentum); - if(l->decay != net.decay) - fprintf(fp, "decay=%g\n", l->decay); - } fprintf(fp, "filters=%d\n" "size=%d\n" "stride=%d\n" @@ -610,47 +490,19 @@ void print_deconvolutional_cfg(FILE *fp, deconvolutional_layer *l, network net, fprintf(fp, "\n\n"); } -void print_freeweight_cfg(FILE *fp, freeweight_layer *l, network net, int count) -{ - fprintf(fp, "[freeweight]\n"); - if(count == 0){ - fprintf(fp, "batch=%d\ninput=%d\n",l->batch, l->inputs); - } - fprintf(fp, "\n"); -} - void print_dropout_cfg(FILE *fp, dropout_layer *l, network net, int count) { fprintf(fp, "[dropout]\n"); - if(count == 0){ - fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); - } fprintf(fp, "probability=%g\n\n", l->probability); } void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count) { - #ifdef GPU +#ifdef GPU if(gpu_index >= 0) pull_connected_layer(*l); - #endif +#endif int i; fprintf(fp, "[connected]\n"); - if(count == 0){ - fprintf(fp, "batch=%d\n" - "input=%d\n" - "learning_rate=%g\n" - "momentum=%g\n" - "decay=%g\n" - "seen=%d\n", - l->batch, l->inputs, l->learning_rate, l->momentum, l->decay, net.seen); - } else { - if(l->learning_rate != net.learning_rate) - fprintf(fp, "learning_rate=%g\n", l->learning_rate); - if(l->momentum != net.momentum) - fprintf(fp, "momentum=%g\n", l->momentum); - if(l->decay != net.decay) - fprintf(fp, "decay=%g\n", l->decay); - } fprintf(fp, "output=%d\n" "activation=%s\n", l->outputs, @@ -666,39 +518,18 @@ void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count) void print_crop_cfg(FILE *fp, crop_layer *l, network net, int count) { fprintf(fp, "[crop]\n"); - if(count == 0) { - fprintf(fp, "batch=%d\n" - "height=%d\n" - "width=%d\n" - "channels=%d\n" - "learning_rate=%g\n" - "momentum=%g\n" - "decay=%g\n" - "seen=%d\n", - l->batch,l->h, l->w, l->c, net.learning_rate, net.momentum, net.decay, net.seen); - } fprintf(fp, "crop_height=%d\ncrop_width=%d\nflip=%d\n\n", l->crop_height, l->crop_width, l->flip); } void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count) { fprintf(fp, "[maxpool]\n"); - if(count == 0) fprintf(fp, "batch=%d\n" - "height=%d\n" - "width=%d\n" - "channels=%d\n", - l->batch,l->h, l->w, l->c); fprintf(fp, "size=%d\nstride=%d\n\n", l->size, l->stride); } void print_normalization_cfg(FILE *fp, normalization_layer *l, network net, int count) { fprintf(fp, "[localresponsenormalization]\n"); - if(count == 0) fprintf(fp, "batch=%d\n" - "height=%d\n" - "width=%d\n" - "channels=%d\n", - l->batch,l->h, l->w, l->c); fprintf(fp, "size=%d\n" "alpha=%g\n" "beta=%g\n" @@ -708,7 +539,6 @@ void print_normalization_cfg(FILE *fp, normalization_layer *l, network net, int void print_softmax_cfg(FILE *fp, softmax_layer *l, network net, int count) { fprintf(fp, "[softmax]\n"); - if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); fprintf(fp, "\n"); } @@ -722,7 +552,6 @@ void print_detection_cfg(FILE *fp, detection_layer *l, network net, int count) void print_cost_cfg(FILE *fp, cost_layer *l, network net, int count) { fprintf(fp, "[cost]\ntype=%s\n", get_cost_string(l->type)); - if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); fprintf(fp, "\n"); } @@ -741,33 +570,33 @@ void save_weights(network net, char *filename) for(i = 0; i < net.n; ++i){ if(net.types[i] == CONVOLUTIONAL){ convolutional_layer layer = *(convolutional_layer *) net.layers[i]; - #ifdef GPU +#ifdef GPU if(gpu_index >= 0){ pull_convolutional_layer(layer); } - #endif +#endif int num = layer.n*layer.c*layer.size*layer.size; fwrite(layer.biases, sizeof(float), layer.n, fp); fwrite(layer.filters, sizeof(float), num, fp); } if(net.types[i] == DECONVOLUTIONAL){ deconvolutional_layer layer = *(deconvolutional_layer *) net.layers[i]; - #ifdef GPU +#ifdef GPU if(gpu_index >= 0){ pull_deconvolutional_layer(layer); } - #endif +#endif int num = layer.n*layer.c*layer.size*layer.size; fwrite(layer.biases, sizeof(float), layer.n, fp); fwrite(layer.filters, sizeof(float), num, fp); } if(net.types[i] == CONNECTED){ connected_layer layer = *(connected_layer *) net.layers[i]; - #ifdef GPU +#ifdef GPU if(gpu_index >= 0){ pull_connected_layer(layer); } - #endif +#endif fwrite(layer.biases, sizeof(float), layer.outputs, fp); fwrite(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp); } @@ -785,8 +614,7 @@ void load_weights_upto(network *net, char *filename, int cutoff) fread(&net->momentum, sizeof(float), 1, fp); fread(&net->decay, sizeof(float), 1, fp); fread(&net->seen, sizeof(int), 1, fp); - set_learning_network(net, net->learning_rate, net->momentum, net->decay); - + int i; for(i = 0; i < net->n && i < cutoff; ++i){ if(net->types[i] == CONVOLUTIONAL){ @@ -794,32 +622,32 @@ void load_weights_upto(network *net, char *filename, int cutoff) int num = layer.n*layer.c*layer.size*layer.size; fread(layer.biases, sizeof(float), layer.n, fp); fread(layer.filters, sizeof(float), num, fp); - #ifdef GPU +#ifdef GPU if(gpu_index >= 0){ push_convolutional_layer(layer); } - #endif +#endif } if(net->types[i] == DECONVOLUTIONAL){ deconvolutional_layer layer = *(deconvolutional_layer *) net->layers[i]; int num = layer.n*layer.c*layer.size*layer.size; fread(layer.biases, sizeof(float), layer.n, fp); fread(layer.filters, sizeof(float), num, fp); - #ifdef GPU +#ifdef GPU if(gpu_index >= 0){ push_deconvolutional_layer(layer); } - #endif +#endif } if(net->types[i] == CONNECTED){ connected_layer layer = *(connected_layer *) net->layers[i]; fread(layer.biases, sizeof(float), layer.outputs, fp); fread(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp); - #ifdef GPU +#ifdef GPU if(gpu_index >= 0){ push_connected_layer(layer); } - #endif +#endif } } fclose(fp); @@ -847,8 +675,6 @@ void save_network(network net, char *filename) print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i); else if(net.types[i] == MAXPOOL) print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i); - else if(net.types[i] == FREEWEIGHT) - print_freeweight_cfg(fp, (freeweight_layer *)net.layers[i], net, i); else if(net.types[i] == DROPOUT) print_dropout_cfg(fp, (dropout_layer *)net.layers[i], net, i); else if(net.types[i] == NORMALIZATION) diff --git a/src/softmax_layer.c b/src/softmax_layer.c index a200ae54..e344d166 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -7,7 +7,7 @@ #include #include -softmax_layer *make_softmax_layer(int batch, int groups, int inputs) +softmax_layer *make_softmax_layer(int batch, int inputs, int groups) { assert(inputs%groups == 0); fprintf(stderr, "Softmax Layer: %d inputs\n", inputs); @@ -42,21 +42,21 @@ void softmax_array(float *input, int n, float *output) } } -void forward_softmax_layer(const softmax_layer layer, float *input) +void forward_softmax_layer(const softmax_layer layer, network_state state) { int b; int inputs = layer.inputs / layer.groups; int batch = layer.batch * layer.groups; for(b = 0; b < batch; ++b){ - softmax_array(input+b*inputs, inputs, layer.output+b*inputs); + softmax_array(state.input+b*inputs, inputs, layer.output+b*inputs); } } -void backward_softmax_layer(const softmax_layer layer, float *delta) +void backward_softmax_layer(const softmax_layer layer, network_state state) { int i; for(i = 0; i < layer.inputs*layer.batch; ++i){ - delta[i] = layer.delta[i]; + state.delta[i] = layer.delta[i]; } } diff --git a/src/softmax_layer.h b/src/softmax_layer.h index 3632c747..ecdec1ed 100644 --- a/src/softmax_layer.h +++ b/src/softmax_layer.h @@ -1,5 +1,6 @@ #ifndef SOFTMAX_LAYER_H #define SOFTMAX_LAYER_H +#include "params.h" typedef struct { int inputs; @@ -14,14 +15,14 @@ typedef struct { } softmax_layer; void softmax_array(float *input, int n, float *output); -softmax_layer *make_softmax_layer(int batch, int groups, int inputs); -void forward_softmax_layer(const softmax_layer layer, float *input); -void backward_softmax_layer(const softmax_layer layer, float *delta); +softmax_layer *make_softmax_layer(int batch, int inputs, int groups); +void forward_softmax_layer(const softmax_layer layer, network_state state); +void backward_softmax_layer(const softmax_layer layer, network_state state); #ifdef GPU void pull_softmax_layer_output(const softmax_layer layer); -void forward_softmax_layer_gpu(const softmax_layer layer, float *input); -void backward_softmax_layer_gpu(const softmax_layer layer, float *delta); +void forward_softmax_layer_gpu(const softmax_layer layer, network_state state); +void backward_softmax_layer_gpu(const softmax_layer layer, network_state state); #endif #endif diff --git a/src/softmax_layer_kernels.cu b/src/softmax_layer_kernels.cu index c0e8bc38..0529f755 100644 --- a/src/softmax_layer_kernels.cu +++ b/src/softmax_layer_kernels.cu @@ -32,23 +32,17 @@ extern "C" void pull_softmax_layer_output(const softmax_layer layer) cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); } -extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, float *input) +extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, network_state state) { int inputs = layer.inputs / layer.groups; int batch = layer.batch * layer.groups; - forward_softmax_layer_kernel<<>>(inputs, batch, input, layer.output_gpu); + forward_softmax_layer_kernel<<>>(inputs, batch, state.input, layer.output_gpu); check_error(cudaPeekAtLastError()); - - /* - cl_read_array(layer.output_cl, layer.output, layer.inputs*layer.batch); - int z; - for(z = 0; z < layer.inputs*layer.batch; ++z) printf("%f,",layer.output[z]); - */ } -extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, float *delta) +extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) { - copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, delta, 1); + copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, state.delta, 1); } /* This is if you want softmax w/o log-loss classification. You probably don't.