mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
refactoring and added DARK ZONE
This commit is contained in:
parent
f047cfff99
commit
dcb000b553
@ -16,7 +16,7 @@ void train_captcha(char *cfgfile, char *weightfile)
|
|||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
int imgs = 1024;
|
int imgs = 1024;
|
||||||
int i = net.seen/imgs;
|
int i = net.seen/imgs;
|
||||||
list *plist = get_paths("/data/captcha/train.list");
|
list *plist = get_paths("/data/captcha/train.base");
|
||||||
char **paths = (char **)list_to_array(plist);
|
char **paths = (char **)list_to_array(plist);
|
||||||
printf("%d\n", plist->size);
|
printf("%d\n", plist->size);
|
||||||
clock_t time;
|
clock_t time;
|
||||||
|
@ -9,15 +9,11 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay)
|
connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
connected_layer *layer = calloc(1, sizeof(connected_layer));
|
connected_layer *layer = calloc(1, sizeof(connected_layer));
|
||||||
|
|
||||||
layer->learning_rate = learning_rate;
|
|
||||||
layer->momentum = momentum;
|
|
||||||
layer->decay = decay;
|
|
||||||
|
|
||||||
layer->inputs = inputs;
|
layer->inputs = inputs;
|
||||||
layer->outputs = outputs;
|
layer->outputs = outputs;
|
||||||
layer->batch=batch;
|
layer->batch=batch;
|
||||||
@ -59,41 +55,17 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVA
|
|||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void secret_update_connected_layer(connected_layer *layer)
|
void update_connected_layer(connected_layer layer, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
int n = layer->outputs*layer->inputs;
|
axpy_cpu(layer.outputs, learning_rate, layer.bias_updates, 1, layer.biases, 1);
|
||||||
float dot = dot_cpu(n, layer->weight_updates, 1, layer->weight_prev, 1);
|
scal_cpu(layer.outputs, momentum, layer.bias_updates, 1);
|
||||||
float mag = sqrt(dot_cpu(n, layer->weight_updates, 1, layer->weight_updates, 1))
|
|
||||||
* sqrt(dot_cpu(n, layer->weight_prev, 1, layer->weight_prev, 1));
|
|
||||||
float cos = dot/mag;
|
|
||||||
if(cos > .3) layer->learning_rate *= 1.1;
|
|
||||||
else if (cos < -.3) layer-> learning_rate /= 1.1;
|
|
||||||
|
|
||||||
scal_cpu(n, layer->momentum, layer->weight_prev, 1);
|
axpy_cpu(layer.inputs*layer.outputs, -decay, layer.weights, 1, layer.weight_updates, 1);
|
||||||
axpy_cpu(n, 1, layer->weight_updates, 1, layer->weight_prev, 1);
|
axpy_cpu(layer.inputs*layer.outputs, learning_rate, layer.weight_updates, 1, layer.weights, 1);
|
||||||
scal_cpu(n, 0, layer->weight_updates, 1);
|
scal_cpu(layer.inputs*layer.outputs, momentum, layer.weight_updates, 1);
|
||||||
|
|
||||||
scal_cpu(layer->outputs, layer->momentum, layer->bias_prev, 1);
|
|
||||||
axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1);
|
|
||||||
scal_cpu(layer->outputs, 0, layer->bias_updates, 1);
|
|
||||||
|
|
||||||
axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1);
|
|
||||||
|
|
||||||
axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1);
|
|
||||||
axpy_cpu(layer->inputs*layer->outputs, layer->learning_rate, layer->weight_prev, 1, layer->weights, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_connected_layer(connected_layer layer)
|
void forward_connected_layer(connected_layer layer, network_state state)
|
||||||
{
|
|
||||||
axpy_cpu(layer.outputs, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1);
|
|
||||||
scal_cpu(layer.outputs, layer.momentum, layer.bias_updates, 1);
|
|
||||||
|
|
||||||
axpy_cpu(layer.inputs*layer.outputs, -layer.decay, layer.weights, 1, layer.weight_updates, 1);
|
|
||||||
axpy_cpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates, 1, layer.weights, 1);
|
|
||||||
scal_cpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void forward_connected_layer(connected_layer layer, float *input)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
@ -102,14 +74,14 @@ void forward_connected_layer(connected_layer layer, float *input)
|
|||||||
int m = layer.batch;
|
int m = layer.batch;
|
||||||
int k = layer.inputs;
|
int k = layer.inputs;
|
||||||
int n = layer.outputs;
|
int n = layer.outputs;
|
||||||
float *a = input;
|
float *a = state.input;
|
||||||
float *b = layer.weights;
|
float *b = layer.weights;
|
||||||
float *c = layer.output;
|
float *c = layer.output;
|
||||||
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||||
activate_array(layer.output, layer.outputs*layer.batch, layer.activation);
|
activate_array(layer.output, layer.outputs*layer.batch, layer.activation);
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_connected_layer(connected_layer layer, float *input, float *delta)
|
void backward_connected_layer(connected_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float alpha = 1./layer.batch;
|
float alpha = 1./layer.batch;
|
||||||
@ -120,7 +92,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
|
|||||||
int m = layer.inputs;
|
int m = layer.inputs;
|
||||||
int k = layer.batch;
|
int k = layer.batch;
|
||||||
int n = layer.outputs;
|
int n = layer.outputs;
|
||||||
float *a = input;
|
float *a = state.input;
|
||||||
float *b = layer.delta;
|
float *b = layer.delta;
|
||||||
float *c = layer.weight_updates;
|
float *c = layer.weight_updates;
|
||||||
gemm(1,0,m,n,k,alpha,a,m,b,n,1,c,n);
|
gemm(1,0,m,n,k,alpha,a,m,b,n,1,c,n);
|
||||||
@ -131,7 +103,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
|
|||||||
|
|
||||||
a = layer.delta;
|
a = layer.delta;
|
||||||
b = layer.weights;
|
b = layer.weights;
|
||||||
c = delta;
|
c = state.delta;
|
||||||
|
|
||||||
if(c) gemm(0,1,m,n,k,1,a,k,b,k,0,c,n);
|
if(c) gemm(0,1,m,n,k,1,a,k,b,k,0,c,n);
|
||||||
}
|
}
|
||||||
@ -154,23 +126,17 @@ void push_connected_layer(connected_layer layer)
|
|||||||
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.outputs);
|
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_connected_layer_gpu(connected_layer layer)
|
void update_connected_layer_gpu(connected_layer layer, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
/*
|
axpy_ongpu(layer.outputs, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
||||||
cuda_pull_array(layer.weights_gpu, layer.weights, layer.inputs*layer.outputs);
|
scal_ongpu(layer.outputs, momentum, layer.bias_updates_gpu, 1);
|
||||||
cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.inputs*layer.outputs);
|
|
||||||
printf("Weights: %f updates: %f\n", mag_array(layer.weights, layer.inputs*layer.outputs), layer.learning_rate*mag_array(layer.weight_updates, layer.inputs*layer.outputs));
|
|
||||||
*/
|
|
||||||
|
|
||||||
axpy_ongpu(layer.outputs, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
axpy_ongpu(layer.inputs*layer.outputs, -decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
|
||||||
scal_ongpu(layer.outputs, layer.momentum, layer.bias_updates_gpu, 1);
|
axpy_ongpu(layer.inputs*layer.outputs, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
|
||||||
|
scal_ongpu(layer.inputs*layer.outputs, momentum, layer.weight_updates_gpu, 1);
|
||||||
axpy_ongpu(layer.inputs*layer.outputs, -layer.decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
|
|
||||||
axpy_ongpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
|
|
||||||
scal_ongpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates_gpu, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_connected_layer_gpu(connected_layer layer, float * input)
|
void forward_connected_layer_gpu(connected_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
@ -179,14 +145,14 @@ void forward_connected_layer_gpu(connected_layer layer, float * input)
|
|||||||
int m = layer.batch;
|
int m = layer.batch;
|
||||||
int k = layer.inputs;
|
int k = layer.inputs;
|
||||||
int n = layer.outputs;
|
int n = layer.outputs;
|
||||||
float * a = input;
|
float * a = state.input;
|
||||||
float * b = layer.weights_gpu;
|
float * b = layer.weights_gpu;
|
||||||
float * c = layer.output_gpu;
|
float * c = layer.output_gpu;
|
||||||
gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||||
activate_array_ongpu(layer.output_gpu, layer.outputs*layer.batch, layer.activation);
|
activate_array_ongpu(layer.output_gpu, layer.outputs*layer.batch, layer.activation);
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_connected_layer_gpu(connected_layer layer, float * input, float * delta)
|
void backward_connected_layer_gpu(connected_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
float alpha = 1./layer.batch;
|
float alpha = 1./layer.batch;
|
||||||
int i;
|
int i;
|
||||||
@ -197,7 +163,7 @@ void backward_connected_layer_gpu(connected_layer layer, float * input, float *
|
|||||||
int m = layer.inputs;
|
int m = layer.inputs;
|
||||||
int k = layer.batch;
|
int k = layer.batch;
|
||||||
int n = layer.outputs;
|
int n = layer.outputs;
|
||||||
float * a = input;
|
float * a = state.input;
|
||||||
float * b = layer.delta_gpu;
|
float * b = layer.delta_gpu;
|
||||||
float * c = layer.weight_updates_gpu;
|
float * c = layer.weight_updates_gpu;
|
||||||
gemm_ongpu(1,0,m,n,k,alpha,a,m,b,n,1,c,n);
|
gemm_ongpu(1,0,m,n,k,alpha,a,m,b,n,1,c,n);
|
||||||
@ -208,7 +174,7 @@ void backward_connected_layer_gpu(connected_layer layer, float * input, float *
|
|||||||
|
|
||||||
a = layer.delta_gpu;
|
a = layer.delta_gpu;
|
||||||
b = layer.weights_gpu;
|
b = layer.weights_gpu;
|
||||||
c = delta;
|
c = state.delta;
|
||||||
|
|
||||||
if(c) gemm_ongpu(0,1,m,n,k,1,a,k,b,k,0,c,n);
|
if(c) gemm_ongpu(0,1,m,n,k,1,a,k,b,k,0,c,n);
|
||||||
}
|
}
|
||||||
|
@ -2,12 +2,9 @@
|
|||||||
#define CONNECTED_LAYER_H
|
#define CONNECTED_LAYER_H
|
||||||
|
|
||||||
#include "activations.h"
|
#include "activations.h"
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
typedef struct{
|
typedef struct{
|
||||||
float learning_rate;
|
|
||||||
float momentum;
|
|
||||||
float decay;
|
|
||||||
|
|
||||||
int batch;
|
int batch;
|
||||||
int inputs;
|
int inputs;
|
||||||
int outputs;
|
int outputs;
|
||||||
@ -37,17 +34,16 @@ typedef struct{
|
|||||||
|
|
||||||
} connected_layer;
|
} connected_layer;
|
||||||
|
|
||||||
void secret_update_connected_layer(connected_layer *layer);
|
connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation);
|
||||||
connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay);
|
|
||||||
|
|
||||||
void forward_connected_layer(connected_layer layer, float *input);
|
void forward_connected_layer(connected_layer layer, network_state state);
|
||||||
void backward_connected_layer(connected_layer layer, float *input, float *delta);
|
void backward_connected_layer(connected_layer layer, network_state state);
|
||||||
void update_connected_layer(connected_layer layer);
|
void update_connected_layer(connected_layer layer, float learning_rate, float momentum, float decay);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_connected_layer_gpu(connected_layer layer, float * input);
|
void forward_connected_layer_gpu(connected_layer layer, network_state state);
|
||||||
void backward_connected_layer_gpu(connected_layer layer, float * input, float * delta);
|
void backward_connected_layer_gpu(connected_layer layer, network_state state);
|
||||||
void update_connected_layer_gpu(connected_layer layer);
|
void update_connected_layer_gpu(connected_layer layer, float learning_rate, float momentum, float decay);
|
||||||
void push_connected_layer(connected_layer layer);
|
void push_connected_layer(connected_layer layer);
|
||||||
void pull_connected_layer(connected_layer layer);
|
void pull_connected_layer(connected_layer layer);
|
||||||
#endif
|
#endif
|
||||||
|
@ -54,7 +54,7 @@ extern "C" void backward_bias_gpu(float *bias_updates, float *delta, int batch,
|
|||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float *in)
|
extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int m = layer.n;
|
int m = layer.n;
|
||||||
@ -65,7 +65,7 @@ extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float
|
|||||||
bias_output_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, n);
|
bias_output_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, n);
|
||||||
|
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
im2col_ongpu(in + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu);
|
im2col_ongpu(state.input + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu);
|
||||||
float * a = layer.filters_gpu;
|
float * a = layer.filters_gpu;
|
||||||
float * b = layer.col_image_gpu;
|
float * b = layer.col_image_gpu;
|
||||||
float * c = layer.output_gpu;
|
float * c = layer.output_gpu;
|
||||||
@ -74,7 +74,7 @@ extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float
|
|||||||
activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation);
|
activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu)
|
extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
float alpha = 1./layer.batch;
|
float alpha = 1./layer.batch;
|
||||||
int i;
|
int i;
|
||||||
@ -86,17 +86,17 @@ extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, floa
|
|||||||
gradient_array_ongpu(layer.output_gpu, m*k*layer.batch, layer.activation, layer.delta_gpu);
|
gradient_array_ongpu(layer.output_gpu, m*k*layer.batch, layer.activation, layer.delta_gpu);
|
||||||
backward_bias_gpu(layer.bias_updates_gpu, layer.delta_gpu, layer.batch, layer.n, k);
|
backward_bias_gpu(layer.bias_updates_gpu, layer.delta_gpu, layer.batch, layer.n, k);
|
||||||
|
|
||||||
if(delta_gpu) scal_ongpu(layer.batch*layer.h*layer.w*layer.c, 0, delta_gpu, 1);
|
if(state.delta) scal_ongpu(layer.batch*layer.h*layer.w*layer.c, 0, state.delta, 1);
|
||||||
|
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
float * a = layer.delta_gpu;
|
float * a = layer.delta_gpu;
|
||||||
float * b = layer.col_image_gpu;
|
float * b = layer.col_image_gpu;
|
||||||
float * c = layer.filter_updates_gpu;
|
float * c = layer.filter_updates_gpu;
|
||||||
|
|
||||||
im2col_ongpu(in + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu);
|
im2col_ongpu(state.input + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu);
|
||||||
gemm_ongpu(0,1,m,n,k,alpha,a + i*m*k,k,b,k,1,c,n);
|
gemm_ongpu(0,1,m,n,k,alpha,a + i*m*k,k,b,k,1,c,n);
|
||||||
|
|
||||||
if(delta_gpu){
|
if(state.delta){
|
||||||
|
|
||||||
float * a = layer.filters_gpu;
|
float * a = layer.filters_gpu;
|
||||||
float * b = layer.delta_gpu;
|
float * b = layer.delta_gpu;
|
||||||
@ -104,7 +104,7 @@ extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, floa
|
|||||||
|
|
||||||
gemm_ongpu(1,0,n,k,m,1,a,n,b + i*k*m,k,0,c,k);
|
gemm_ongpu(1,0,n,k,m,1,a,n,b + i*k*m,k,0,c,k);
|
||||||
|
|
||||||
col2im_ongpu(layer.col_image_gpu, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta_gpu + i*layer.c*layer.h*layer.w);
|
col2im_ongpu(layer.col_image_gpu, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, state.delta + i*layer.c*layer.h*layer.w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -125,22 +125,15 @@ extern "C" void push_convolutional_layer(convolutional_layer layer)
|
|||||||
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void update_convolutional_layer_gpu(convolutional_layer layer)
|
extern "C" void update_convolutional_layer_gpu(convolutional_layer layer, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
int size = layer.size*layer.size*layer.c*layer.n;
|
int size = layer.size*layer.size*layer.c*layer.n;
|
||||||
|
|
||||||
/*
|
axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
||||||
cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size);
|
scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1);
|
||||||
cuda_pull_array(layer.filters_gpu, layer.filters, size);
|
|
||||||
printf("Filter: %f updates: %f\n", mag_array(layer.filters, size), layer.learning_rate*mag_array(layer.filter_updates, size));
|
|
||||||
*/
|
|
||||||
|
|
||||||
axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
axpy_ongpu(size, -decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1);
|
||||||
scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1);
|
axpy_ongpu(size, learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1);
|
||||||
|
scal_ongpu(size, momentum, layer.filter_updates_gpu, 1);
|
||||||
axpy_ongpu(size, -layer.decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1);
|
|
||||||
axpy_ongpu(size, layer.learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1);
|
|
||||||
scal_ongpu(size, layer.momentum, layer.filter_updates_gpu, 1);
|
|
||||||
//pull_convolutional_layer(layer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,15 +41,11 @@ image get_convolutional_delta(convolutional_layer layer)
|
|||||||
return float_to_image(h,w,c,layer.delta);
|
return float_to_image(h,w,c,layer.delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay)
|
convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
convolutional_layer *layer = calloc(1, sizeof(convolutional_layer));
|
convolutional_layer *layer = calloc(1, sizeof(convolutional_layer));
|
||||||
|
|
||||||
layer->learning_rate = learning_rate;
|
|
||||||
layer->momentum = momentum;
|
|
||||||
layer->decay = decay;
|
|
||||||
|
|
||||||
layer->h = h;
|
layer->h = h;
|
||||||
layer->w = w;
|
layer->w = w;
|
||||||
layer->c = c;
|
layer->c = c;
|
||||||
@ -143,7 +139,7 @@ void backward_bias(float *bias_updates, float *delta, int batch, int n, int size
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void forward_convolutional_layer(const convolutional_layer layer, float *in)
|
void forward_convolutional_layer(const convolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int out_h = convolutional_out_height(layer);
|
int out_h = convolutional_out_height(layer);
|
||||||
int out_w = convolutional_out_width(layer);
|
int out_w = convolutional_out_width(layer);
|
||||||
@ -160,16 +156,16 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
|
|||||||
float *c = layer.output;
|
float *c = layer.output;
|
||||||
|
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
im2col_cpu(in, layer.c, layer.h, layer.w,
|
im2col_cpu(state.input, layer.c, layer.h, layer.w,
|
||||||
layer.size, layer.stride, layer.pad, b);
|
layer.size, layer.stride, layer.pad, b);
|
||||||
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||||
c += n*m;
|
c += n*m;
|
||||||
in += layer.c*layer.h*layer.w;
|
state.input += layer.c*layer.h*layer.w;
|
||||||
}
|
}
|
||||||
activate_array(layer.output, m*n*layer.batch, layer.activation);
|
activate_array(layer.output, m*n*layer.batch, layer.activation);
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_convolutional_layer(convolutional_layer layer, float *in, float *delta)
|
void backward_convolutional_layer(convolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
float alpha = 1./layer.batch;
|
float alpha = 1./layer.batch;
|
||||||
int i;
|
int i;
|
||||||
@ -181,40 +177,40 @@ void backward_convolutional_layer(convolutional_layer layer, float *in, float *d
|
|||||||
gradient_array(layer.output, m*k*layer.batch, layer.activation, layer.delta);
|
gradient_array(layer.output, m*k*layer.batch, layer.activation, layer.delta);
|
||||||
backward_bias(layer.bias_updates, layer.delta, layer.batch, layer.n, k);
|
backward_bias(layer.bias_updates, layer.delta, layer.batch, layer.n, k);
|
||||||
|
|
||||||
if(delta) memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
||||||
|
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
float *a = layer.delta + i*m*k;
|
float *a = layer.delta + i*m*k;
|
||||||
float *b = layer.col_image;
|
float *b = layer.col_image;
|
||||||
float *c = layer.filter_updates;
|
float *c = layer.filter_updates;
|
||||||
|
|
||||||
float *im = in+i*layer.c*layer.h*layer.w;
|
float *im = state.input+i*layer.c*layer.h*layer.w;
|
||||||
|
|
||||||
im2col_cpu(im, layer.c, layer.h, layer.w,
|
im2col_cpu(im, layer.c, layer.h, layer.w,
|
||||||
layer.size, layer.stride, layer.pad, b);
|
layer.size, layer.stride, layer.pad, b);
|
||||||
gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
|
gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
|
||||||
|
|
||||||
if(delta){
|
if(state.delta){
|
||||||
a = layer.filters;
|
a = layer.filters;
|
||||||
b = layer.delta + i*m*k;
|
b = layer.delta + i*m*k;
|
||||||
c = layer.col_image;
|
c = layer.col_image;
|
||||||
|
|
||||||
gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);
|
gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);
|
||||||
|
|
||||||
col2im_cpu(layer.col_image, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta+i*layer.c*layer.h*layer.w);
|
col2im_cpu(layer.col_image, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, state.delta+i*layer.c*layer.h*layer.w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_convolutional_layer(convolutional_layer layer)
|
void update_convolutional_layer(convolutional_layer layer, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
int size = layer.size*layer.size*layer.c*layer.n;
|
int size = layer.size*layer.size*layer.c*layer.n;
|
||||||
axpy_cpu(layer.n, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1);
|
axpy_cpu(layer.n, learning_rate, layer.bias_updates, 1, layer.biases, 1);
|
||||||
scal_cpu(layer.n, layer.momentum, layer.bias_updates, 1);
|
scal_cpu(layer.n, momentum, layer.bias_updates, 1);
|
||||||
|
|
||||||
axpy_cpu(size, -layer.decay, layer.filters, 1, layer.filter_updates, 1);
|
axpy_cpu(size, -decay, layer.filters, 1, layer.filter_updates, 1);
|
||||||
axpy_cpu(size, layer.learning_rate, layer.filter_updates, 1, layer.filters, 1);
|
axpy_cpu(size, learning_rate, layer.filter_updates, 1, layer.filters, 1);
|
||||||
scal_cpu(size, layer.momentum, layer.filter_updates, 1);
|
scal_cpu(size, momentum, layer.filter_updates, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,14 +2,11 @@
|
|||||||
#define CONVOLUTIONAL_LAYER_H
|
#define CONVOLUTIONAL_LAYER_H
|
||||||
|
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
|
#include "params.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "activations.h"
|
#include "activations.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
float learning_rate;
|
|
||||||
float momentum;
|
|
||||||
float decay;
|
|
||||||
|
|
||||||
int batch;
|
int batch;
|
||||||
int h,w,c;
|
int h,w,c;
|
||||||
int n;
|
int n;
|
||||||
@ -42,9 +39,9 @@ typedef struct {
|
|||||||
} convolutional_layer;
|
} convolutional_layer;
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_convolutional_layer_gpu(convolutional_layer layer, float * in);
|
void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
||||||
void backward_convolutional_layer_gpu(convolutional_layer layer, float * in, float * delta_gpu);
|
void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
|
||||||
void update_convolutional_layer_gpu(convolutional_layer layer);
|
void update_convolutional_layer_gpu(convolutional_layer layer, float learning_rate, float momentum, float decay);
|
||||||
|
|
||||||
void push_convolutional_layer(convolutional_layer layer);
|
void push_convolutional_layer(convolutional_layer layer);
|
||||||
void pull_convolutional_layer(convolutional_layer layer);
|
void pull_convolutional_layer(convolutional_layer layer);
|
||||||
@ -53,13 +50,13 @@ void bias_output_gpu(float *output, float *biases, int batch, int n, int size);
|
|||||||
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
|
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay);
|
convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation);
|
||||||
void resize_convolutional_layer(convolutional_layer *layer, int h, int w);
|
void resize_convolutional_layer(convolutional_layer *layer, int h, int w);
|
||||||
void forward_convolutional_layer(const convolutional_layer layer, float *in);
|
void forward_convolutional_layer(const convolutional_layer layer, network_state state);
|
||||||
void update_convolutional_layer(convolutional_layer layer);
|
void update_convolutional_layer(convolutional_layer layer, float learning_rate, float momentum, float decay);
|
||||||
image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters);
|
image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters);
|
||||||
|
|
||||||
void backward_convolutional_layer(convolutional_layer layer, float *in, float *delta);
|
void backward_convolutional_layer(convolutional_layer layer, network_state state);
|
||||||
|
|
||||||
void bias_output(float *output, float *biases, int batch, int n, int size);
|
void bias_output(float *output, float *biases, int batch, int n, int size);
|
||||||
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
|
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
|
||||||
|
@ -47,48 +47,36 @@ void push_cost_layer(cost_layer layer)
|
|||||||
cuda_push_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs);
|
cuda_push_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_cost_layer(cost_layer layer, float *input, float *truth)
|
void forward_cost_layer(cost_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
if (!truth) return;
|
if (!state.truth) return;
|
||||||
copy_cpu(layer.batch*layer.inputs, truth, 1, layer.delta, 1);
|
copy_cpu(layer.batch*layer.inputs, state.truth, 1, layer.delta, 1);
|
||||||
axpy_cpu(layer.batch*layer.inputs, -1, input, 1, layer.delta, 1);
|
axpy_cpu(layer.batch*layer.inputs, -1, state.input, 1, layer.delta, 1);
|
||||||
*(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1);
|
*(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1);
|
||||||
//printf("cost: %f\n", *layer.output);
|
//printf("cost: %f\n", *layer.output);
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_cost_layer(const cost_layer layer, float *input, float *delta)
|
void backward_cost_layer(const cost_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
copy_cpu(layer.batch*layer.inputs, layer.delta, 1, delta, 1);
|
copy_cpu(layer.batch*layer.inputs, layer.delta, 1, state.delta, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
|
||||||
void forward_cost_layer_gpu(cost_layer layer, float * input, float * truth)
|
void forward_cost_layer_gpu(cost_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
if (!truth) return;
|
if (!state.truth) return;
|
||||||
|
|
||||||
/*
|
copy_ongpu(layer.batch*layer.inputs, state.truth, 1, layer.delta_gpu, 1);
|
||||||
float *in = calloc(layer.inputs*layer.batch, sizeof(float));
|
axpy_ongpu(layer.batch*layer.inputs, -1, state.input, 1, layer.delta_gpu, 1);
|
||||||
float *t = calloc(layer.inputs*layer.batch, sizeof(float));
|
|
||||||
cuda_pull_array(input, in, layer.batch*layer.inputs);
|
|
||||||
cuda_pull_array(truth, t, layer.batch*layer.inputs);
|
|
||||||
forward_cost_layer(layer, in, t);
|
|
||||||
cuda_push_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs);
|
|
||||||
free(in);
|
|
||||||
free(t);
|
|
||||||
*/
|
|
||||||
|
|
||||||
copy_ongpu(layer.batch*layer.inputs, truth, 1, layer.delta_gpu, 1);
|
|
||||||
axpy_ongpu(layer.batch*layer.inputs, -1, input, 1, layer.delta_gpu, 1);
|
|
||||||
|
|
||||||
cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs);
|
cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs);
|
||||||
*(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1);
|
*(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1);
|
||||||
//printf("cost: %f\n", *layer.output);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_cost_layer_gpu(const cost_layer layer, float * input, float * delta)
|
void backward_cost_layer_gpu(const cost_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, delta, 1);
|
copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, state.delta, 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#ifndef COST_LAYER_H
|
#ifndef COST_LAYER_H
|
||||||
#define COST_LAYER_H
|
#define COST_LAYER_H
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
typedef enum{
|
typedef enum{
|
||||||
SSE
|
SSE
|
||||||
@ -21,12 +22,12 @@ typedef struct {
|
|||||||
COST_TYPE get_cost_type(char *s);
|
COST_TYPE get_cost_type(char *s);
|
||||||
char *get_cost_string(COST_TYPE a);
|
char *get_cost_string(COST_TYPE a);
|
||||||
cost_layer *make_cost_layer(int batch, int inputs, COST_TYPE type);
|
cost_layer *make_cost_layer(int batch, int inputs, COST_TYPE type);
|
||||||
void forward_cost_layer(const cost_layer layer, float *input, float *truth);
|
void forward_cost_layer(const cost_layer layer, network_state state);
|
||||||
void backward_cost_layer(const cost_layer layer, float *input, float *delta);
|
void backward_cost_layer(const cost_layer layer, network_state state);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_cost_layer_gpu(cost_layer layer, float * input, float * truth);
|
void forward_cost_layer_gpu(cost_layer layer, network_state state);
|
||||||
void backward_cost_layer_gpu(const cost_layer layer, float * input, float * delta);
|
void backward_cost_layer_gpu(const cost_layer layer, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -28,7 +28,7 @@ crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int
|
|||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_crop_layer(const crop_layer layer, int train, float *input)
|
void forward_crop_layer(const crop_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i,j,c,b,row,col;
|
int i,j,c,b,row,col;
|
||||||
int index;
|
int index;
|
||||||
@ -36,7 +36,7 @@ void forward_crop_layer(const crop_layer layer, int train, float *input)
|
|||||||
int flip = (layer.flip && rand()%2);
|
int flip = (layer.flip && rand()%2);
|
||||||
int dh = rand()%(layer.h - layer.crop_height + 1);
|
int dh = rand()%(layer.h - layer.crop_height + 1);
|
||||||
int dw = rand()%(layer.w - layer.crop_width + 1);
|
int dw = rand()%(layer.w - layer.crop_width + 1);
|
||||||
if(!train){
|
if(!state.train){
|
||||||
flip = 0;
|
flip = 0;
|
||||||
dh = (layer.h - layer.crop_height)/2;
|
dh = (layer.h - layer.crop_height)/2;
|
||||||
dw = (layer.w - layer.crop_width)/2;
|
dw = (layer.w - layer.crop_width)/2;
|
||||||
@ -52,7 +52,7 @@ void forward_crop_layer(const crop_layer layer, int train, float *input)
|
|||||||
}
|
}
|
||||||
row = i + dh;
|
row = i + dh;
|
||||||
index = col+layer.w*(row+layer.h*(c + layer.c*b));
|
index = col+layer.w*(row+layer.h*(c + layer.c*b));
|
||||||
layer.output[count++] = input[index];
|
layer.output[count++] = state.input[index];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#define CROP_LAYER_H
|
#define CROP_LAYER_H
|
||||||
|
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int batch;
|
int batch;
|
||||||
@ -17,10 +18,10 @@ typedef struct {
|
|||||||
|
|
||||||
image get_crop_image(crop_layer layer);
|
image get_crop_image(crop_layer layer);
|
||||||
crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip);
|
crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip);
|
||||||
void forward_crop_layer(const crop_layer layer, int train, float *input);
|
void forward_crop_layer(const crop_layer layer, network_state state);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_crop_layer_gpu(crop_layer layer, int train, float *input);
|
void forward_crop_layer_gpu(crop_layer layer, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -24,12 +24,12 @@ __global__ void forward_crop_layer_kernel(float *input, int size, int c, int h,
|
|||||||
output[count] = input[index];
|
output[count] = input[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_crop_layer_gpu(crop_layer layer, int train, float *input)
|
extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int flip = (layer.flip && rand()%2);
|
int flip = (layer.flip && rand()%2);
|
||||||
int dh = rand()%(layer.h - layer.crop_height + 1);
|
int dh = rand()%(layer.h - layer.crop_height + 1);
|
||||||
int dw = rand()%(layer.w - layer.crop_width + 1);
|
int dw = rand()%(layer.w - layer.crop_width + 1);
|
||||||
if(!train){
|
if(!state.train){
|
||||||
flip = 0;
|
flip = 0;
|
||||||
dh = (layer.h - layer.crop_height)/2;
|
dh = (layer.h - layer.crop_height)/2;
|
||||||
dw = (layer.w - layer.crop_width)/2;
|
dw = (layer.w - layer.crop_width)/2;
|
||||||
@ -39,7 +39,7 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, int train, float *input
|
|||||||
dim3 dimBlock(BLOCK, 1, 1);
|
dim3 dimBlock(BLOCK, 1, 1);
|
||||||
dim3 dimGrid((size-1)/BLOCK + 1, 1, 1);
|
dim3 dimGrid((size-1)/BLOCK + 1, 1, 1);
|
||||||
|
|
||||||
forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(input, size, layer.c, layer.h, layer.w,
|
forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.c, layer.h, layer.w,
|
||||||
layer.crop_height, layer.crop_width, dh, dw, flip, layer.output_gpu);
|
layer.crop_height, layer.crop_width, dh, dw, flip, layer.output_gpu);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
86
src/data.c
86
src/data.c
@ -18,6 +18,7 @@ struct load_args{
|
|||||||
int nw;
|
int nw;
|
||||||
int jitter;
|
int jitter;
|
||||||
int classes;
|
int classes;
|
||||||
|
int background;
|
||||||
data *d;
|
data *d;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -62,17 +63,62 @@ matrix load_image_paths(char **paths, int n, int h, int w)
|
|||||||
return X;
|
return X;
|
||||||
}
|
}
|
||||||
|
|
||||||
void fill_truth_detection(char *path, float *truth, int classes, int height, int width, int num_height, int num_width, int dy, int dx, int jitter, int flip)
|
typedef struct box{
|
||||||
|
int id;
|
||||||
|
float x,y,w,h;
|
||||||
|
} box;
|
||||||
|
|
||||||
|
box *read_boxes(char *filename, int *n)
|
||||||
|
{
|
||||||
|
box *boxes = calloc(1, sizeof(box));
|
||||||
|
FILE *file = fopen(filename, "r");
|
||||||
|
if(!file) file_error(filename);
|
||||||
|
float x, y, h, w;
|
||||||
|
int id;
|
||||||
|
int count = 0;
|
||||||
|
while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
|
||||||
|
boxes = realloc(boxes, (count+1)*sizeof(box));
|
||||||
|
boxes[count].id = id;
|
||||||
|
boxes[count].x = x;
|
||||||
|
boxes[count].y = y;
|
||||||
|
boxes[count].h = h;
|
||||||
|
boxes[count].w = w;
|
||||||
|
++count;
|
||||||
|
}
|
||||||
|
fclose(file);
|
||||||
|
*n = count;
|
||||||
|
return boxes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void randomize_boxes(box *b, int n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < n; ++i){
|
||||||
|
box swap = b[i];
|
||||||
|
int index = rand()%n;
|
||||||
|
b[i] = b[index];
|
||||||
|
b[index] = swap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void fill_truth_detection(char *path, float *truth, int classes, int height, int width, int num_height, int num_width, int dy, int dx, int jitter, int flip, int background)
|
||||||
{
|
{
|
||||||
int box_height = height/num_height;
|
int box_height = height/num_height;
|
||||||
int box_width = width/num_width;
|
int box_width = width/num_width;
|
||||||
char *labelpath = find_replace(path, "VOC2012/JPEGImages", "labels");
|
char *labelpath = find_replace(path, "VOC2012/JPEGImages", "labels");
|
||||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||||
FILE *file = fopen(labelpath, "r");
|
int count = 0;
|
||||||
if(!file) file_error(labelpath);
|
box *boxes = read_boxes(labelpath, &count);
|
||||||
|
randomize_boxes(boxes, count);
|
||||||
float x, y, h, w;
|
float x, y, h, w;
|
||||||
int id;
|
int id;
|
||||||
while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
|
int i, j;
|
||||||
|
for(i = 0; i < count; ++i){
|
||||||
|
x = boxes[i].x;
|
||||||
|
y = boxes[i].y;
|
||||||
|
w = boxes[i].w;
|
||||||
|
h = boxes[i].h;
|
||||||
|
id = boxes[i].id;
|
||||||
if(flip) x = 1-x;
|
if(flip) x = 1-x;
|
||||||
x *= width + jitter;
|
x *= width + jitter;
|
||||||
y *= height + jitter;
|
y *= height + jitter;
|
||||||
@ -88,23 +134,24 @@ void fill_truth_detection(char *path, float *truth, int classes, int height, int
|
|||||||
|
|
||||||
float dw = (x - i*box_width)/box_width;
|
float dw = (x - i*box_width)/box_width;
|
||||||
float dh = (y - j*box_height)/box_height;
|
float dh = (y - j*box_height)/box_height;
|
||||||
//printf("%d %d %d %f %f\n", id, i, j, dh, dw);
|
|
||||||
int index = (i+j*num_width)*(4+classes);
|
int index = (i+j*num_width)*(4+classes+background);
|
||||||
if(truth[index+classes]) continue;
|
if(truth[index+classes+background]) continue;
|
||||||
truth[index+id] = 1;
|
truth[index+id] = 1;
|
||||||
index += classes;
|
index += classes+background;
|
||||||
truth[index++] = dh;
|
truth[index++] = dh;
|
||||||
truth[index++] = dw;
|
truth[index++] = dw;
|
||||||
truth[index++] = h*(height+jitter)/height;
|
truth[index++] = h*(height+jitter)/height;
|
||||||
truth[index++] = w*(width+jitter)/width;
|
truth[index++] = w*(width+jitter)/width;
|
||||||
}
|
}
|
||||||
int i, j;
|
free(boxes);
|
||||||
for(i = 0; i < num_height*num_width*(4+classes); i += 4+classes){
|
if(background){
|
||||||
int background = 1;
|
for(i = 0; i < num_height*num_width*(4+classes+background); i += 4+classes+background){
|
||||||
for(j = i; j < i+classes; ++j) if (truth[j]) background = 0;
|
int object = 0;
|
||||||
truth[i+classes-1] = background;
|
for(j = i; j < i+classes; ++j) if (truth[j]) object = 1;
|
||||||
|
truth[i+classes] = !object;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
fclose(file);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define NUMCHARS 37
|
#define NUMCHARS 37
|
||||||
@ -218,20 +265,20 @@ void free_data(data d)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter)
|
data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background)
|
||||||
{
|
{
|
||||||
char **random_paths = get_random_paths(paths, n, m);
|
char **random_paths = get_random_paths(paths, n, m);
|
||||||
int i;
|
int i;
|
||||||
data d;
|
data d;
|
||||||
d.shallow = 0;
|
d.shallow = 0;
|
||||||
d.X = load_image_paths(random_paths, n, h, w);
|
d.X = load_image_paths(random_paths, n, h, w);
|
||||||
int k = nh*nw*(4+classes);
|
int k = nh*nw*(4+classes+background);
|
||||||
d.y = make_matrix(n, k);
|
d.y = make_matrix(n, k);
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
int dx = rand()%jitter;
|
int dx = rand()%jitter;
|
||||||
int dy = rand()%jitter;
|
int dy = rand()%jitter;
|
||||||
int flip = rand()%2;
|
int flip = rand()%2;
|
||||||
fill_truth_detection(random_paths[i], d.y.vals[i], classes, h-jitter, w-jitter, nh, nw, dy, dx, jitter, flip);
|
fill_truth_detection(random_paths[i], d.y.vals[i], classes, h-jitter, w-jitter, nh, nw, dy, dx, jitter, flip, background);
|
||||||
image a = float_to_image(h, w, 3, d.X.vals[i]);
|
image a = float_to_image(h, w, 3, d.X.vals[i]);
|
||||||
if(flip) flip_image(a);
|
if(flip) flip_image(a);
|
||||||
jitter_image(a,h-jitter,w-jitter,dy,dx);
|
jitter_image(a,h-jitter,w-jitter,dy,dx);
|
||||||
@ -245,14 +292,14 @@ void *load_detection_thread(void *ptr)
|
|||||||
{
|
{
|
||||||
printf("Loading data: %d\n", rand());
|
printf("Loading data: %d\n", rand());
|
||||||
struct load_args a = *(struct load_args*)ptr;
|
struct load_args a = *(struct load_args*)ptr;
|
||||||
*a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.h, a.w, a.nh, a.nw, a.jitter);
|
*a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.h, a.w, a.nh, a.nw, a.jitter, a.background);
|
||||||
translate_data_rows(*a.d, -128);
|
translate_data_rows(*a.d, -128);
|
||||||
scale_data_rows(*a.d, 1./128);
|
scale_data_rows(*a.d, 1./128);
|
||||||
free(ptr);
|
free(ptr);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d)
|
pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background, data *d)
|
||||||
{
|
{
|
||||||
pthread_t thread;
|
pthread_t thread;
|
||||||
struct load_args *args = calloc(1, sizeof(struct load_args));
|
struct load_args *args = calloc(1, sizeof(struct load_args));
|
||||||
@ -265,6 +312,7 @@ pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, in
|
|||||||
args->nw = nw;
|
args->nw = nw;
|
||||||
args->classes = classes;
|
args->classes = classes;
|
||||||
args->jitter = jitter;
|
args->jitter = jitter;
|
||||||
|
args->background = background;
|
||||||
args->d = d;
|
args->d = d;
|
||||||
if(pthread_create(&thread, 0, load_detection_thread, args)) {
|
if(pthread_create(&thread, 0, load_detection_thread, args)) {
|
||||||
error("Thread creation failed");
|
error("Thread creation failed");
|
||||||
|
@ -20,8 +20,8 @@ data load_data_captcha_encode(char **paths, int n, int m, int h, int w);
|
|||||||
data load_data(char **paths, int n, int m, char **labels, int k, int h, int w);
|
data load_data(char **paths, int n, int m, char **labels, int k, int h, int w);
|
||||||
pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d);
|
pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d);
|
||||||
|
|
||||||
pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d);
|
pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background, data *d);
|
||||||
data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter);
|
data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background);
|
||||||
|
|
||||||
data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w);
|
data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w);
|
||||||
data load_cifar10_data(char *filename);
|
data load_cifar10_data(char *filename);
|
||||||
|
@ -9,7 +9,7 @@ extern "C" {
|
|||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, float *in)
|
extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int out_h = deconvolutional_out_height(layer);
|
int out_h = deconvolutional_out_height(layer);
|
||||||
@ -24,7 +24,7 @@ extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, f
|
|||||||
|
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
float *a = layer.filters_gpu;
|
float *a = layer.filters_gpu;
|
||||||
float *b = in + i*layer.c*layer.h*layer.w;
|
float *b = state.input + i*layer.c*layer.h*layer.w;
|
||||||
float *c = layer.col_image_gpu;
|
float *c = layer.col_image_gpu;
|
||||||
|
|
||||||
gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
|
gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
|
||||||
@ -34,7 +34,7 @@ extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, f
|
|||||||
activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation);
|
activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, float *in, float *delta_gpu)
|
extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
float alpha = 1./layer.batch;
|
float alpha = 1./layer.batch;
|
||||||
int out_h = deconvolutional_out_height(layer);
|
int out_h = deconvolutional_out_height(layer);
|
||||||
@ -45,14 +45,14 @@ extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer,
|
|||||||
gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu);
|
gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu);
|
||||||
backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size);
|
backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size);
|
||||||
|
|
||||||
if(delta_gpu) memset(delta_gpu, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
||||||
|
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
int m = layer.c;
|
int m = layer.c;
|
||||||
int n = layer.size*layer.size*layer.n;
|
int n = layer.size*layer.size*layer.n;
|
||||||
int k = layer.h*layer.w;
|
int k = layer.h*layer.w;
|
||||||
|
|
||||||
float *a = in + i*m*n;
|
float *a = state.input + i*m*n;
|
||||||
float *b = layer.col_image_gpu;
|
float *b = layer.col_image_gpu;
|
||||||
float *c = layer.filter_updates_gpu;
|
float *c = layer.filter_updates_gpu;
|
||||||
|
|
||||||
@ -60,14 +60,14 @@ extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer,
|
|||||||
layer.size, layer.stride, 0, b);
|
layer.size, layer.stride, 0, b);
|
||||||
gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
|
gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
|
||||||
|
|
||||||
if(delta_gpu){
|
if(state.delta){
|
||||||
int m = layer.c;
|
int m = layer.c;
|
||||||
int n = layer.h*layer.w;
|
int n = layer.h*layer.w;
|
||||||
int k = layer.size*layer.size*layer.n;
|
int k = layer.size*layer.size*layer.n;
|
||||||
|
|
||||||
float *a = layer.filters_gpu;
|
float *a = layer.filters_gpu;
|
||||||
float *b = layer.col_image_gpu;
|
float *b = layer.col_image_gpu;
|
||||||
float *c = delta_gpu + i*n*m;
|
float *c = state.delta + i*n*m;
|
||||||
|
|
||||||
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||||
}
|
}
|
||||||
@ -90,15 +90,15 @@ extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
|
|||||||
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer)
|
extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
int size = layer.size*layer.size*layer.c*layer.n;
|
int size = layer.size*layer.size*layer.c*layer.n;
|
||||||
|
|
||||||
axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
||||||
scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1);
|
scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1);
|
||||||
|
|
||||||
axpy_ongpu(size, -layer.decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1);
|
axpy_ongpu(size, -decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1);
|
||||||
axpy_ongpu(size, layer.learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1);
|
axpy_ongpu(size, learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1);
|
||||||
scal_ongpu(size, layer.momentum, layer.filter_updates_gpu, 1);
|
scal_ongpu(size, momentum, layer.filter_updates_gpu, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,15 +43,11 @@ image get_deconvolutional_delta(deconvolutional_layer layer)
|
|||||||
return float_to_image(h,w,c,layer.delta);
|
return float_to_image(h,w,c,layer.delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, float learning_rate, float momentum, float decay)
|
deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
deconvolutional_layer *layer = calloc(1, sizeof(deconvolutional_layer));
|
deconvolutional_layer *layer = calloc(1, sizeof(deconvolutional_layer));
|
||||||
|
|
||||||
layer->learning_rate = learning_rate;
|
|
||||||
layer->momentum = momentum;
|
|
||||||
layer->decay = decay;
|
|
||||||
|
|
||||||
layer->h = h;
|
layer->h = h;
|
||||||
layer->w = w;
|
layer->w = w;
|
||||||
layer->c = c;
|
layer->c = c;
|
||||||
@ -120,7 +116,7 @@ void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in)
|
void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int out_h = deconvolutional_out_height(layer);
|
int out_h = deconvolutional_out_height(layer);
|
||||||
@ -135,7 +131,7 @@ void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in)
|
|||||||
|
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
float *a = layer.filters;
|
float *a = layer.filters;
|
||||||
float *b = in + i*layer.c*layer.h*layer.w;
|
float *b = state.input + i*layer.c*layer.h*layer.w;
|
||||||
float *c = layer.col_image;
|
float *c = layer.col_image;
|
||||||
|
|
||||||
gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);
|
gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);
|
||||||
@ -145,7 +141,7 @@ void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in)
|
|||||||
activate_array(layer.output, layer.batch*layer.n*size, layer.activation);
|
activate_array(layer.output, layer.batch*layer.n*size, layer.activation);
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, float *delta)
|
void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
float alpha = 1./layer.batch;
|
float alpha = 1./layer.batch;
|
||||||
int out_h = deconvolutional_out_height(layer);
|
int out_h = deconvolutional_out_height(layer);
|
||||||
@ -156,14 +152,14 @@ void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, floa
|
|||||||
gradient_array(layer.output, size*layer.n*layer.batch, layer.activation, layer.delta);
|
gradient_array(layer.output, size*layer.n*layer.batch, layer.activation, layer.delta);
|
||||||
backward_bias(layer.bias_updates, layer.delta, layer.batch, layer.n, size);
|
backward_bias(layer.bias_updates, layer.delta, layer.batch, layer.n, size);
|
||||||
|
|
||||||
if(delta) memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
||||||
|
|
||||||
for(i = 0; i < layer.batch; ++i){
|
for(i = 0; i < layer.batch; ++i){
|
||||||
int m = layer.c;
|
int m = layer.c;
|
||||||
int n = layer.size*layer.size*layer.n;
|
int n = layer.size*layer.size*layer.n;
|
||||||
int k = layer.h*layer.w;
|
int k = layer.h*layer.w;
|
||||||
|
|
||||||
float *a = in + i*m*n;
|
float *a = state.input + i*m*n;
|
||||||
float *b = layer.col_image;
|
float *b = layer.col_image;
|
||||||
float *c = layer.filter_updates;
|
float *c = layer.filter_updates;
|
||||||
|
|
||||||
@ -171,29 +167,29 @@ void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, floa
|
|||||||
layer.size, layer.stride, 0, b);
|
layer.size, layer.stride, 0, b);
|
||||||
gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
|
gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
|
||||||
|
|
||||||
if(delta){
|
if(state.delta){
|
||||||
int m = layer.c;
|
int m = layer.c;
|
||||||
int n = layer.h*layer.w;
|
int n = layer.h*layer.w;
|
||||||
int k = layer.size*layer.size*layer.n;
|
int k = layer.size*layer.size*layer.n;
|
||||||
|
|
||||||
float *a = layer.filters;
|
float *a = layer.filters;
|
||||||
float *b = layer.col_image;
|
float *b = layer.col_image;
|
||||||
float *c = delta + i*n*m;
|
float *c = state.delta + i*n*m;
|
||||||
|
|
||||||
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_deconvolutional_layer(deconvolutional_layer layer)
|
void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
|
||||||
{
|
{
|
||||||
int size = layer.size*layer.size*layer.c*layer.n;
|
int size = layer.size*layer.size*layer.c*layer.n;
|
||||||
axpy_cpu(layer.n, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1);
|
axpy_cpu(layer.n, learning_rate, layer.bias_updates, 1, layer.biases, 1);
|
||||||
scal_cpu(layer.n, layer.momentum, layer.bias_updates, 1);
|
scal_cpu(layer.n, momentum, layer.bias_updates, 1);
|
||||||
|
|
||||||
axpy_cpu(size, -layer.decay, layer.filters, 1, layer.filter_updates, 1);
|
axpy_cpu(size, -decay, layer.filters, 1, layer.filter_updates, 1);
|
||||||
axpy_cpu(size, layer.learning_rate, layer.filter_updates, 1, layer.filters, 1);
|
axpy_cpu(size, learning_rate, layer.filter_updates, 1, layer.filters, 1);
|
||||||
scal_cpu(size, layer.momentum, layer.filter_updates, 1);
|
scal_cpu(size, momentum, layer.filter_updates, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,14 +2,11 @@
|
|||||||
#define DECONVOLUTIONAL_LAYER_H
|
#define DECONVOLUTIONAL_LAYER_H
|
||||||
|
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
|
#include "params.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "activations.h"
|
#include "activations.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
float learning_rate;
|
|
||||||
float momentum;
|
|
||||||
float decay;
|
|
||||||
|
|
||||||
int batch;
|
int batch;
|
||||||
int h,w,c;
|
int h,w,c;
|
||||||
int n;
|
int n;
|
||||||
@ -41,18 +38,18 @@ typedef struct {
|
|||||||
} deconvolutional_layer;
|
} deconvolutional_layer;
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, float * in);
|
void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
|
||||||
void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, float * in, float * delta_gpu);
|
void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
|
||||||
void update_deconvolutional_layer_gpu(deconvolutional_layer layer);
|
void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
|
||||||
void push_deconvolutional_layer(deconvolutional_layer layer);
|
void push_deconvolutional_layer(deconvolutional_layer layer);
|
||||||
void pull_deconvolutional_layer(deconvolutional_layer layer);
|
void pull_deconvolutional_layer(deconvolutional_layer layer);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, float learning_rate, float momentum, float decay);
|
deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
|
||||||
void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
|
void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
|
||||||
void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in);
|
void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
|
||||||
void update_deconvolutional_layer(deconvolutional_layer layer);
|
void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
|
||||||
void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, float *delta);
|
void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
|
||||||
|
|
||||||
image get_deconvolutional_image(deconvolutional_layer layer);
|
image get_deconvolutional_image(deconvolutional_layer layer);
|
||||||
image get_deconvolutional_delta(deconvolutional_layer layer);
|
image get_deconvolutional_delta(deconvolutional_layer layer);
|
||||||
|
@ -61,15 +61,16 @@ void train_detection(char *cfgfile, char *weightfile)
|
|||||||
data train, buffer;
|
data train, buffer;
|
||||||
int im_dim = 512;
|
int im_dim = 512;
|
||||||
int jitter = 64;
|
int jitter = 64;
|
||||||
int classes = 21;
|
int classes = 20;
|
||||||
pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, &buffer);
|
int background = 1;
|
||||||
|
pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, background, &buffer);
|
||||||
clock_t time;
|
clock_t time;
|
||||||
while(1){
|
while(1){
|
||||||
i += 1;
|
i += 1;
|
||||||
time=clock();
|
time=clock();
|
||||||
pthread_join(load_thread, 0);
|
pthread_join(load_thread, 0);
|
||||||
train = buffer;
|
train = buffer;
|
||||||
load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, &buffer);
|
load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, background, &buffer);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
image im = float_to_image(im_dim - jitter, im_dim-jitter, 3, train.X.vals[0]);
|
image im = float_to_image(im_dim - jitter, im_dim-jitter, 3, train.X.vals[0]);
|
||||||
@ -103,10 +104,12 @@ void validate_detection(char *cfgfile, char *weightfile)
|
|||||||
srand(time(0));
|
srand(time(0));
|
||||||
|
|
||||||
list *plist = get_paths("/home/pjreddie/data/voc/val.txt");
|
list *plist = get_paths("/home/pjreddie/data/voc/val.txt");
|
||||||
|
//list *plist = get_paths("/home/pjreddie/data/voc/train.txt");
|
||||||
char **paths = (char **)list_to_array(plist);
|
char **paths = (char **)list_to_array(plist);
|
||||||
int num_output = 1225;
|
|
||||||
int im_size = 448;
|
int im_size = 448;
|
||||||
int classes = 21;
|
int classes = 20;
|
||||||
|
int background = 0;
|
||||||
|
int num_output = 7*7*(4+classes+background);
|
||||||
|
|
||||||
int m = plist->size;
|
int m = plist->size;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
@ -130,26 +133,18 @@ void validate_detection(char *cfgfile, char *weightfile)
|
|||||||
matrix pred = network_predict_data(net, val);
|
matrix pred = network_predict_data(net, val);
|
||||||
int j, k, class;
|
int j, k, class;
|
||||||
for(j = 0; j < pred.rows; ++j){
|
for(j = 0; j < pred.rows; ++j){
|
||||||
for(k = 0; k < pred.cols; k += classes+4){
|
for(k = 0; k < pred.cols; k += classes+4+background){
|
||||||
|
for(class = 0; class < classes; ++class){
|
||||||
/*
|
int index = (k)/(classes+4+background);
|
||||||
int z;
|
|
||||||
for(z = 0; z < 25; ++z) printf("%f, ", pred.vals[j][k+z]);
|
|
||||||
printf("\n");
|
|
||||||
*/
|
|
||||||
|
|
||||||
//if (pred.vals[j][k] > .001){
|
|
||||||
for(class = 0; class < classes-1; ++class){
|
|
||||||
int index = (k)/(classes+4);
|
|
||||||
int r = index/7;
|
int r = index/7;
|
||||||
int c = index%7;
|
int c = index%7;
|
||||||
float y = (r + pred.vals[j][k+0+classes])/7.;
|
int ci = k+classes+background;
|
||||||
float x = (c + pred.vals[j][k+1+classes])/7.;
|
float y = (r + pred.vals[j][ci + 0])/7.;
|
||||||
float h = pred.vals[j][k+2+classes];
|
float x = (c + pred.vals[j][ci + 1])/7.;
|
||||||
float w = pred.vals[j][k+3+classes];
|
float h = pred.vals[j][ci + 2];
|
||||||
|
float w = pred.vals[j][ci + 3];
|
||||||
printf("%d %d %f %f %f %f %f\n", (i-1)*m/splits + j, class, pred.vals[j][k+class], y, x, h, w);
|
printf("%d %d %f %f %f %f %f\n", (i-1)*m/splits + j, class, pred.vals[j][k+class], y, x, h, w);
|
||||||
}
|
}
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,28 +39,52 @@ detection_layer *make_detection_layer(int batch, int inputs, int classes, int co
|
|||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_detection_layer(const detection_layer layer, float *in, float *truth)
|
|
||||||
|
void forward_detection_layer(const detection_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int in_i = 0;
|
int in_i = 0;
|
||||||
int out_i = 0;
|
int out_i = 0;
|
||||||
int locations = get_detection_layer_locations(layer);
|
int locations = get_detection_layer_locations(layer);
|
||||||
int i,j;
|
int i,j;
|
||||||
for(i = 0; i < layer.batch*locations; ++i){
|
for(i = 0; i < layer.batch*locations; ++i){
|
||||||
int mask = (!truth || !truth[out_i + layer.classes - 1]);
|
int mask = (!state.truth || state.truth[out_i + layer.classes + 2]);
|
||||||
float scale = 1;
|
float scale = 1;
|
||||||
if(layer.rescore) scale = in[in_i++];
|
if(layer.rescore) scale = state.input[in_i++];
|
||||||
for(j = 0; j < layer.classes; ++j){
|
for(j = 0; j < layer.classes; ++j){
|
||||||
layer.output[out_i++] = scale*in[in_i++];
|
layer.output[out_i++] = scale*state.input[in_i++];
|
||||||
|
}
|
||||||
|
if(!layer.rescore){
|
||||||
|
softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes);
|
||||||
|
activate_array(state.input+in_i, layer.coords, LOGISTIC);
|
||||||
}
|
}
|
||||||
softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes);
|
|
||||||
activate_array(in+in_i, layer.coords, LOGISTIC);
|
|
||||||
for(j = 0; j < layer.coords; ++j){
|
for(j = 0; j < layer.coords; ++j){
|
||||||
layer.output[out_i++] = mask*in[in_i++];
|
layer.output[out_i++] = mask*state.input[in_i++];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_detection_layer(const detection_layer layer, float *in, float *delta)
|
void dark_zone(detection_layer layer, int index, network_state state)
|
||||||
|
{
|
||||||
|
int size = layer.classes+layer.rescore+layer.coords;
|
||||||
|
int location = (index%(7*7*size)) / size ;
|
||||||
|
int r = location / 7;
|
||||||
|
int c = location % 7;
|
||||||
|
int class = index%size;
|
||||||
|
if(layer.rescore) --class;
|
||||||
|
int dr, dc;
|
||||||
|
for(dr = -1; dr <= 1; ++dr){
|
||||||
|
for(dc = -1; dc <= 1; ++dc){
|
||||||
|
if(!(dr || dc)) continue;
|
||||||
|
if((r + dr) > 6 || (r + dr) < 0) continue;
|
||||||
|
if((c + dc) > 6 || (c + dc) < 0) continue;
|
||||||
|
int di = (dr*7 + dc) * size;
|
||||||
|
if(state.truth[index+di]) continue;
|
||||||
|
layer.delta[index + di] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void backward_detection_layer(const detection_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int locations = get_detection_layer_locations(layer);
|
int locations = get_detection_layer_locations(layer);
|
||||||
int i,j;
|
int i,j;
|
||||||
@ -69,49 +93,68 @@ void backward_detection_layer(const detection_layer layer, float *in, float *del
|
|||||||
for(i = 0; i < layer.batch*locations; ++i){
|
for(i = 0; i < layer.batch*locations; ++i){
|
||||||
float scale = 1;
|
float scale = 1;
|
||||||
float latent_delta = 0;
|
float latent_delta = 0;
|
||||||
if(layer.rescore) scale = in[in_i++];
|
if(layer.rescore) scale = state.input[in_i++];
|
||||||
|
if(!layer.rescore){
|
||||||
|
for(j = 0; j < layer.classes-1; ++j){
|
||||||
|
if(state.truth[out_i + j]) dark_zone(layer, out_i+j, state);
|
||||||
|
}
|
||||||
|
}
|
||||||
for(j = 0; j < layer.classes; ++j){
|
for(j = 0; j < layer.classes; ++j){
|
||||||
latent_delta += in[in_i]*layer.delta[out_i];
|
latent_delta += state.input[in_i]*layer.delta[out_i];
|
||||||
delta[in_i++] = scale*layer.delta[out_i++];
|
state.delta[in_i++] = scale*layer.delta[out_i++];
|
||||||
}
|
}
|
||||||
|
|
||||||
gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i);
|
if (!layer.rescore) gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i);
|
||||||
for(j = 0; j < layer.coords; ++j){
|
for(j = 0; j < layer.coords; ++j){
|
||||||
delta[in_i++] = layer.delta[out_i++];
|
state.delta[in_i++] = layer.delta[out_i++];
|
||||||
}
|
}
|
||||||
if(layer.rescore) delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta;
|
if(layer.rescore) state.delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
|
||||||
void forward_detection_layer_gpu(const detection_layer layer, float *in, float *truth)
|
void forward_detection_layer_gpu(const detection_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int outputs = get_detection_layer_output_size(layer);
|
int outputs = get_detection_layer_output_size(layer);
|
||||||
float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float));
|
float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float));
|
||||||
float *truth_cpu = 0;
|
float *truth_cpu = 0;
|
||||||
if(truth){
|
if(state.truth){
|
||||||
truth_cpu = calloc(layer.batch*outputs, sizeof(float));
|
truth_cpu = calloc(layer.batch*outputs, sizeof(float));
|
||||||
cuda_pull_array(truth, truth_cpu, layer.batch*outputs);
|
cuda_pull_array(state.truth, truth_cpu, layer.batch*outputs);
|
||||||
}
|
}
|
||||||
cuda_pull_array(in, in_cpu, layer.batch*layer.inputs);
|
cuda_pull_array(state.input, in_cpu, layer.batch*layer.inputs);
|
||||||
forward_detection_layer(layer, in_cpu, truth_cpu);
|
network_state cpu_state;
|
||||||
|
cpu_state.train = state.train;
|
||||||
|
cpu_state.truth = truth_cpu;
|
||||||
|
cpu_state.input = in_cpu;
|
||||||
|
forward_detection_layer(layer, cpu_state);
|
||||||
cuda_push_array(layer.output_gpu, layer.output, layer.batch*outputs);
|
cuda_push_array(layer.output_gpu, layer.output, layer.batch*outputs);
|
||||||
free(in_cpu);
|
free(cpu_state.input);
|
||||||
if(truth_cpu) free(truth_cpu);
|
if(cpu_state.truth) free(cpu_state.truth);
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_detection_layer_gpu(detection_layer layer, float *in, float *delta)
|
void backward_detection_layer_gpu(detection_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int outputs = get_detection_layer_output_size(layer);
|
int outputs = get_detection_layer_output_size(layer);
|
||||||
|
|
||||||
float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float));
|
float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float));
|
||||||
float *delta_cpu = calloc(layer.batch*layer.inputs, sizeof(float));
|
float *delta_cpu = calloc(layer.batch*layer.inputs, sizeof(float));
|
||||||
|
float *truth_cpu = 0;
|
||||||
|
if(state.truth){
|
||||||
|
truth_cpu = calloc(layer.batch*outputs, sizeof(float));
|
||||||
|
cuda_pull_array(state.truth, truth_cpu, layer.batch*outputs);
|
||||||
|
}
|
||||||
|
network_state cpu_state;
|
||||||
|
cpu_state.train = state.train;
|
||||||
|
cpu_state.input = in_cpu;
|
||||||
|
cpu_state.truth = truth_cpu;
|
||||||
|
cpu_state.delta = delta_cpu;
|
||||||
|
|
||||||
cuda_pull_array(in, in_cpu, layer.batch*layer.inputs);
|
cuda_pull_array(state.input, in_cpu, layer.batch*layer.inputs);
|
||||||
cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*outputs);
|
cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*outputs);
|
||||||
backward_detection_layer(layer, in_cpu, delta_cpu);
|
backward_detection_layer(layer, cpu_state);
|
||||||
cuda_push_array(delta, delta_cpu, layer.batch*layer.inputs);
|
cuda_push_array(state.delta, delta_cpu, layer.batch*layer.inputs);
|
||||||
|
|
||||||
free(in_cpu);
|
free(in_cpu);
|
||||||
free(delta_cpu);
|
free(delta_cpu);
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#ifndef DETECTION_LAYER_H
|
#ifndef DETECTION_LAYER_H
|
||||||
#define DETECTION_LAYER_H
|
#define DETECTION_LAYER_H
|
||||||
|
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int batch;
|
int batch;
|
||||||
int inputs;
|
int inputs;
|
||||||
@ -16,13 +18,13 @@ typedef struct {
|
|||||||
} detection_layer;
|
} detection_layer;
|
||||||
|
|
||||||
detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore);
|
detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore);
|
||||||
void forward_detection_layer(const detection_layer layer, float *in, float *truth);
|
void forward_detection_layer(const detection_layer layer, network_state state);
|
||||||
void backward_detection_layer(const detection_layer layer, float *in, float *delta);
|
void backward_detection_layer(const detection_layer layer, network_state state);
|
||||||
int get_detection_layer_output_size(detection_layer layer);
|
int get_detection_layer_output_size(detection_layer layer);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_detection_layer_gpu(const detection_layer layer, float *in, float *truth);
|
void forward_detection_layer_gpu(const detection_layer layer, network_state state);
|
||||||
void backward_detection_layer_gpu(detection_layer layer, float *in, float *delta);
|
void backward_detection_layer_gpu(detection_layer layer, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include "dropout_layer.h"
|
#include "dropout_layer.h"
|
||||||
|
#include "params.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -11,11 +12,9 @@ dropout_layer *make_dropout_layer(int batch, int inputs, float probability)
|
|||||||
layer->probability = probability;
|
layer->probability = probability;
|
||||||
layer->inputs = inputs;
|
layer->inputs = inputs;
|
||||||
layer->batch = batch;
|
layer->batch = batch;
|
||||||
layer->output = calloc(inputs*batch, sizeof(float));
|
|
||||||
layer->rand = calloc(inputs*batch, sizeof(float));
|
layer->rand = calloc(inputs*batch, sizeof(float));
|
||||||
layer->scale = 1./(1.-probability);
|
layer->scale = 1./(1.-probability);
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
layer->output_gpu = cuda_make_array(layer->output, inputs*batch);
|
|
||||||
layer->rand_gpu = cuda_make_array(layer->rand, inputs*batch);
|
layer->rand_gpu = cuda_make_array(layer->rand, inputs*batch);
|
||||||
#endif
|
#endif
|
||||||
return layer;
|
return layer;
|
||||||
@ -23,36 +22,34 @@ dropout_layer *make_dropout_layer(int batch, int inputs, float probability)
|
|||||||
|
|
||||||
void resize_dropout_layer(dropout_layer *layer, int inputs)
|
void resize_dropout_layer(dropout_layer *layer, int inputs)
|
||||||
{
|
{
|
||||||
layer->output = realloc(layer->output, layer->inputs*layer->batch*sizeof(float));
|
|
||||||
layer->rand = realloc(layer->rand, layer->inputs*layer->batch*sizeof(float));
|
layer->rand = realloc(layer->rand, layer->inputs*layer->batch*sizeof(float));
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
cuda_free(layer->output_gpu);
|
|
||||||
cuda_free(layer->rand_gpu);
|
cuda_free(layer->rand_gpu);
|
||||||
|
|
||||||
layer->output_gpu = cuda_make_array(layer->output, inputs*layer->batch);
|
|
||||||
layer->rand_gpu = cuda_make_array(layer->rand, inputs*layer->batch);
|
layer->rand_gpu = cuda_make_array(layer->rand, inputs*layer->batch);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_dropout_layer(dropout_layer layer, float *input)
|
void forward_dropout_layer(dropout_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
if (!state.train) return;
|
||||||
for(i = 0; i < layer.batch * layer.inputs; ++i){
|
for(i = 0; i < layer.batch * layer.inputs; ++i){
|
||||||
float r = rand_uniform();
|
float r = rand_uniform();
|
||||||
layer.rand[i] = r;
|
layer.rand[i] = r;
|
||||||
if(r < layer.probability) layer.output[i] = 0;
|
if(r < layer.probability) state.input[i] = 0;
|
||||||
else layer.output[i] = input[i]*layer.scale;
|
else state.input[i] *= layer.scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_dropout_layer(dropout_layer layer, float *delta)
|
void backward_dropout_layer(dropout_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
if(!delta) return;
|
if(!state.delta) return;
|
||||||
for(i = 0; i < layer.batch * layer.inputs; ++i){
|
for(i = 0; i < layer.batch * layer.inputs; ++i){
|
||||||
float r = layer.rand[i];
|
float r = layer.rand[i];
|
||||||
if(r < layer.probability) delta[i] = 0;
|
if(r < layer.probability) state.delta[i] = 0;
|
||||||
else delta[i] *= layer.scale;
|
else state.delta[i] *= layer.scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#ifndef DROPOUT_LAYER_H
|
#ifndef DROPOUT_LAYER_H
|
||||||
#define DROPOUT_LAYER_H
|
#define DROPOUT_LAYER_H
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
typedef struct{
|
typedef struct{
|
||||||
int batch;
|
int batch;
|
||||||
@ -7,22 +8,20 @@ typedef struct{
|
|||||||
float probability;
|
float probability;
|
||||||
float scale;
|
float scale;
|
||||||
float *rand;
|
float *rand;
|
||||||
float *output;
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
float * rand_gpu;
|
float * rand_gpu;
|
||||||
float * output_gpu;
|
|
||||||
#endif
|
#endif
|
||||||
} dropout_layer;
|
} dropout_layer;
|
||||||
|
|
||||||
dropout_layer *make_dropout_layer(int batch, int inputs, float probability);
|
dropout_layer *make_dropout_layer(int batch, int inputs, float probability);
|
||||||
|
|
||||||
void forward_dropout_layer(dropout_layer layer, float *input);
|
void forward_dropout_layer(dropout_layer layer, network_state state);
|
||||||
void backward_dropout_layer(dropout_layer layer, float *delta);
|
void backward_dropout_layer(dropout_layer layer, network_state state);
|
||||||
void resize_dropout_layer(dropout_layer *layer, int inputs);
|
void resize_dropout_layer(dropout_layer *layer, int inputs);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_dropout_layer_gpu(dropout_layer layer, float * input);
|
void forward_dropout_layer_gpu(dropout_layer layer, network_state state);
|
||||||
void backward_dropout_layer_gpu(dropout_layer layer, float * delta);
|
void backward_dropout_layer_gpu(dropout_layer layer, network_state state);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,32 +2,32 @@ extern "C" {
|
|||||||
#include "dropout_layer.h"
|
#include "dropout_layer.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "params.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale, float *output)
|
__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
|
||||||
{
|
{
|
||||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||||
if(id < size) output[id] = (rand[id] < prob) ? 0 : input[id]*scale;
|
if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_dropout_layer_gpu(dropout_layer layer, float * input)
|
extern "C" void forward_dropout_layer_gpu(dropout_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
|
if (!state.train) return;
|
||||||
int j;
|
int j;
|
||||||
int size = layer.inputs*layer.batch;
|
int size = layer.inputs*layer.batch;
|
||||||
for(j = 0; j < size; ++j) layer.rand[j] = rand_uniform();
|
for(j = 0; j < size; ++j) layer.rand[j] = rand_uniform();
|
||||||
cuda_push_array(layer.rand_gpu, layer.rand, layer.inputs*layer.batch);
|
cuda_push_array(layer.rand_gpu, layer.rand, layer.inputs*layer.batch);
|
||||||
|
|
||||||
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(input, size, layer.rand_gpu, layer.probability,
|
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale);
|
||||||
layer.scale, layer.output_gpu);
|
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_dropout_layer_gpu(dropout_layer layer, float *delta)
|
extern "C" void backward_dropout_layer_gpu(dropout_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
if(!delta) return;
|
if(!state.delta) return;
|
||||||
int size = layer.inputs*layer.batch;
|
int size = layer.inputs*layer.batch;
|
||||||
|
|
||||||
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(delta, size, layer.rand_gpu, layer.probability,
|
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.delta, size, layer.rand_gpu, layer.probability, layer.scale);
|
||||||
layer.scale, delta);
|
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
@ -1,25 +0,0 @@
|
|||||||
#include "freeweight_layer.h"
|
|
||||||
#include "stdlib.h"
|
|
||||||
#include "stdio.h"
|
|
||||||
|
|
||||||
freeweight_layer *make_freeweight_layer(int batch, int inputs)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Freeweight Layer: %d inputs\n", inputs);
|
|
||||||
freeweight_layer *layer = calloc(1, sizeof(freeweight_layer));
|
|
||||||
layer->inputs = inputs;
|
|
||||||
layer->batch = batch;
|
|
||||||
return layer;
|
|
||||||
}
|
|
||||||
|
|
||||||
void forward_freeweight_layer(freeweight_layer layer, float *input)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for(i = 0; i < layer.batch * layer.inputs; ++i){
|
|
||||||
input[i] *= 2.*((float)rand()/RAND_MAX);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void backward_freeweight_layer(freeweight_layer layer, float *input, float *delta)
|
|
||||||
{
|
|
||||||
// Don't do shit LULZ
|
|
||||||
}
|
|
@ -1,14 +0,0 @@
|
|||||||
#ifndef FREEWEIGHT_LAYER_H
|
|
||||||
#define FREEWEIGHT_LAYER_H
|
|
||||||
|
|
||||||
typedef struct{
|
|
||||||
int batch;
|
|
||||||
int inputs;
|
|
||||||
} freeweight_layer;
|
|
||||||
|
|
||||||
freeweight_layer *make_freeweight_layer(int batch, int inputs);
|
|
||||||
|
|
||||||
void forward_freeweight_layer(freeweight_layer layer, float *input);
|
|
||||||
void backward_freeweight_layer(freeweight_layer layer, float *input, float *delta);
|
|
||||||
|
|
||||||
#endif
|
|
@ -58,7 +58,7 @@ void resize_maxpool_layer(maxpool_layer *layer, int h, int w)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_maxpool_layer(const maxpool_layer layer, float *input)
|
void forward_maxpool_layer(const maxpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int b,i,j,k,l,m;
|
int b,i,j,k,l,m;
|
||||||
int w_offset = (-layer.size-1)/2 + 1;
|
int w_offset = (-layer.size-1)/2 + 1;
|
||||||
@ -82,7 +82,7 @@ void forward_maxpool_layer(const maxpool_layer layer, float *input)
|
|||||||
int index = cur_w + layer.w*(cur_h + layer.h*(k + b*layer.c));
|
int index = cur_w + layer.w*(cur_h + layer.h*(k + b*layer.c));
|
||||||
int valid = (cur_h >= 0 && cur_h < layer.h &&
|
int valid = (cur_h >= 0 && cur_h < layer.h &&
|
||||||
cur_w >= 0 && cur_w < layer.w);
|
cur_w >= 0 && cur_w < layer.w);
|
||||||
float val = (valid != 0) ? input[index] : -FLT_MAX;
|
float val = (valid != 0) ? state.input[index] : -FLT_MAX;
|
||||||
max_i = (val > max) ? index : max_i;
|
max_i = (val > max) ? index : max_i;
|
||||||
max = (val > max) ? val : max;
|
max = (val > max) ? val : max;
|
||||||
}
|
}
|
||||||
@ -95,16 +95,16 @@ void forward_maxpool_layer(const maxpool_layer layer, float *input)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_maxpool_layer(const maxpool_layer layer, float *delta)
|
void backward_maxpool_layer(const maxpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int h = (layer.h-1)/layer.stride + 1;
|
int h = (layer.h-1)/layer.stride + 1;
|
||||||
int w = (layer.w-1)/layer.stride + 1;
|
int w = (layer.w-1)/layer.stride + 1;
|
||||||
int c = layer.c;
|
int c = layer.c;
|
||||||
memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
||||||
for(i = 0; i < h*w*c*layer.batch; ++i){
|
for(i = 0; i < h*w*c*layer.batch; ++i){
|
||||||
int index = layer.indexes[i];
|
int index = layer.indexes[i];
|
||||||
delta[index] += layer.delta[i];
|
state.delta[index] += layer.delta[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#define MAXPOOL_LAYER_H
|
#define MAXPOOL_LAYER_H
|
||||||
|
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
#include "params.h"
|
||||||
#include "cuda.h"
|
#include "cuda.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -22,12 +23,12 @@ typedef struct {
|
|||||||
image get_maxpool_image(maxpool_layer layer);
|
image get_maxpool_image(maxpool_layer layer);
|
||||||
maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int size, int stride);
|
maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int size, int stride);
|
||||||
void resize_maxpool_layer(maxpool_layer *layer, int h, int w);
|
void resize_maxpool_layer(maxpool_layer *layer, int h, int w);
|
||||||
void forward_maxpool_layer(const maxpool_layer layer, float *input);
|
void forward_maxpool_layer(const maxpool_layer layer, network_state state);
|
||||||
void backward_maxpool_layer(const maxpool_layer layer, float *delta);
|
void backward_maxpool_layer(const maxpool_layer layer, network_state state);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_maxpool_layer_gpu(maxpool_layer layer, float * input);
|
void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state);
|
||||||
void backward_maxpool_layer_gpu(maxpool_layer layer, float * delta);
|
void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -80,7 +80,7 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
|
|||||||
prev_delta[index] = d;
|
prev_delta[index] = d;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, float *input)
|
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int h = (layer.h-1)/layer.stride + 1;
|
int h = (layer.h-1)/layer.stride + 1;
|
||||||
int w = (layer.w-1)/layer.stride + 1;
|
int w = (layer.w-1)/layer.stride + 1;
|
||||||
@ -88,15 +88,15 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, float *input)
|
|||||||
|
|
||||||
size_t n = h*w*c*layer.batch;
|
size_t n = h*w*c*layer.batch;
|
||||||
|
|
||||||
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, input, layer.output_gpu, layer.indexes_gpu);
|
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, float * delta)
|
extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
size_t n = layer.h*layer.w*layer.c*layer.batch;
|
size_t n = layer.h*layer.w*layer.c*layer.batch;
|
||||||
|
|
||||||
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, delta, layer.indexes_gpu);
|
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
197
src/network.c
197
src/network.c
@ -4,6 +4,7 @@
|
|||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "data.h"
|
#include "data.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
#include "crop_layer.h"
|
#include "crop_layer.h"
|
||||||
#include "connected_layer.h"
|
#include "connected_layer.h"
|
||||||
@ -13,7 +14,6 @@
|
|||||||
#include "maxpool_layer.h"
|
#include "maxpool_layer.h"
|
||||||
#include "cost_layer.h"
|
#include "cost_layer.h"
|
||||||
#include "normalization_layer.h"
|
#include "normalization_layer.h"
|
||||||
#include "freeweight_layer.h"
|
|
||||||
#include "softmax_layer.h"
|
#include "softmax_layer.h"
|
||||||
#include "dropout_layer.h"
|
#include "dropout_layer.h"
|
||||||
|
|
||||||
@ -36,8 +36,6 @@ char *get_layer_string(LAYER_TYPE a)
|
|||||||
return "normalization";
|
return "normalization";
|
||||||
case DROPOUT:
|
case DROPOUT:
|
||||||
return "dropout";
|
return "dropout";
|
||||||
case FREEWEIGHT:
|
|
||||||
return "freeweight";
|
|
||||||
case CROP:
|
case CROP:
|
||||||
return "crop";
|
return "crop";
|
||||||
case COST:
|
case COST:
|
||||||
@ -48,16 +46,18 @@ char *get_layer_string(LAYER_TYPE a)
|
|||||||
return "none";
|
return "none";
|
||||||
}
|
}
|
||||||
|
|
||||||
network make_network(int n, int batch)
|
network make_network(int n)
|
||||||
{
|
{
|
||||||
network net;
|
network net;
|
||||||
net.n = n;
|
net.n = n;
|
||||||
net.batch = batch;
|
|
||||||
net.layers = calloc(net.n, sizeof(void *));
|
net.layers = calloc(net.n, sizeof(void *));
|
||||||
net.types = calloc(net.n, sizeof(LAYER_TYPE));
|
net.types = calloc(net.n, sizeof(LAYER_TYPE));
|
||||||
net.outputs = 0;
|
net.outputs = 0;
|
||||||
net.output = 0;
|
net.output = 0;
|
||||||
net.seen = 0;
|
net.seen = 0;
|
||||||
|
net.batch = 0;
|
||||||
|
net.inputs = 0;
|
||||||
|
net.h = net.w = net.c = 0;
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
net.input_gpu = calloc(1, sizeof(float *));
|
net.input_gpu = calloc(1, sizeof(float *));
|
||||||
net.truth_gpu = calloc(1, sizeof(float *));
|
net.truth_gpu = calloc(1, sizeof(float *));
|
||||||
@ -65,68 +65,41 @@ network make_network(int n, int batch)
|
|||||||
return net;
|
return net;
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_network(network net, float *input, float *truth, int train)
|
void forward_network(network net, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
forward_convolutional_layer(*(convolutional_layer *)net.layers[i], state);
|
||||||
forward_convolutional_layer(layer, input);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DECONVOLUTIONAL){
|
else if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
forward_deconvolutional_layer(*(deconvolutional_layer *)net.layers[i], state);
|
||||||
forward_deconvolutional_layer(layer, input);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DETECTION){
|
else if(net.types[i] == DETECTION){
|
||||||
detection_layer layer = *(detection_layer *)net.layers[i];
|
forward_detection_layer(*(detection_layer *)net.layers[i], state);
|
||||||
forward_detection_layer(layer, input, truth);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CONNECTED){
|
else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
forward_connected_layer(*(connected_layer *)net.layers[i], state);
|
||||||
forward_connected_layer(layer, input);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CROP){
|
else if(net.types[i] == CROP){
|
||||||
crop_layer layer = *(crop_layer *)net.layers[i];
|
forward_crop_layer(*(crop_layer *)net.layers[i], state);
|
||||||
forward_crop_layer(layer, train, input);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == COST){
|
else if(net.types[i] == COST){
|
||||||
cost_layer layer = *(cost_layer *)net.layers[i];
|
forward_cost_layer(*(cost_layer *)net.layers[i], state);
|
||||||
forward_cost_layer(layer, input, truth);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == SOFTMAX){
|
else if(net.types[i] == SOFTMAX){
|
||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
forward_softmax_layer(*(softmax_layer *)net.layers[i], state);
|
||||||
forward_softmax_layer(layer, input);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == MAXPOOL){
|
else if(net.types[i] == MAXPOOL){
|
||||||
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
forward_maxpool_layer(*(maxpool_layer *)net.layers[i], state);
|
||||||
forward_maxpool_layer(layer, input);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == NORMALIZATION){
|
else if(net.types[i] == NORMALIZATION){
|
||||||
normalization_layer layer = *(normalization_layer *)net.layers[i];
|
forward_normalization_layer(*(normalization_layer *)net.layers[i], state);
|
||||||
forward_normalization_layer(layer, input);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DROPOUT){
|
else if(net.types[i] == DROPOUT){
|
||||||
if(!train) continue;
|
forward_dropout_layer(*(dropout_layer *)net.layers[i], state);
|
||||||
dropout_layer layer = *(dropout_layer *)net.layers[i];
|
|
||||||
forward_dropout_layer(layer, input);
|
|
||||||
input = layer.output;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == FREEWEIGHT){
|
state.input = get_network_output_layer(net, i);
|
||||||
if(!train) continue;
|
|
||||||
//freeweight_layer layer = *(freeweight_layer *)net.layers[i];
|
|
||||||
//forward_freeweight_layer(layer, input);
|
|
||||||
}
|
|
||||||
//char buff[256];
|
|
||||||
//sprintf(buff, "layer %d", i);
|
|
||||||
//cuda_compare(get_network_output_gpu_layer(net, i), input, get_network_output_size_layer(net, i)*net.batch, buff);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,15 +109,15 @@ void update_network(network net)
|
|||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||||
update_convolutional_layer(layer);
|
update_convolutional_layer(layer, net.learning_rate, net.momentum, net.decay);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DECONVOLUTIONAL){
|
else if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
||||||
update_deconvolutional_layer(layer);
|
update_deconvolutional_layer(layer, net.learning_rate, net.momentum, net.decay);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CONNECTED){
|
else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
connected_layer layer = *(connected_layer *)net.layers[i];
|
||||||
update_connected_layer(layer);
|
update_connected_layer(layer, net.learning_rate, net.momentum, net.decay);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -152,37 +125,27 @@ void update_network(network net)
|
|||||||
float *get_network_output_layer(network net, int i)
|
float *get_network_output_layer(network net, int i)
|
||||||
{
|
{
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
return ((convolutional_layer *)net.layers[i]) -> output;
|
||||||
return layer.output;
|
|
||||||
} else if(net.types[i] == DECONVOLUTIONAL){
|
} else if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
return ((deconvolutional_layer *)net.layers[i]) -> output;
|
||||||
return layer.output;
|
|
||||||
} else if(net.types[i] == MAXPOOL){
|
} else if(net.types[i] == MAXPOOL){
|
||||||
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
return ((maxpool_layer *)net.layers[i]) -> output;
|
||||||
return layer.output;
|
|
||||||
} else if(net.types[i] == DETECTION){
|
} else if(net.types[i] == DETECTION){
|
||||||
detection_layer layer = *(detection_layer *)net.layers[i];
|
return ((detection_layer *)net.layers[i]) -> output;
|
||||||
return layer.output;
|
|
||||||
} else if(net.types[i] == SOFTMAX){
|
} else if(net.types[i] == SOFTMAX){
|
||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
return ((softmax_layer *)net.layers[i]) -> output;
|
||||||
return layer.output;
|
|
||||||
} else if(net.types[i] == DROPOUT){
|
} else if(net.types[i] == DROPOUT){
|
||||||
dropout_layer layer = *(dropout_layer *)net.layers[i];
|
|
||||||
return layer.output;
|
|
||||||
} else if(net.types[i] == FREEWEIGHT){
|
|
||||||
return get_network_output_layer(net, i-1);
|
return get_network_output_layer(net, i-1);
|
||||||
} else if(net.types[i] == CONNECTED){
|
} else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
return ((connected_layer *)net.layers[i]) -> output;
|
||||||
return layer.output;
|
|
||||||
} else if(net.types[i] == CROP){
|
} else if(net.types[i] == CROP){
|
||||||
crop_layer layer = *(crop_layer *)net.layers[i];
|
return ((crop_layer *)net.layers[i]) -> output;
|
||||||
return layer.output;
|
|
||||||
} else if(net.types[i] == NORMALIZATION){
|
} else if(net.types[i] == NORMALIZATION){
|
||||||
normalization_layer layer = *(normalization_layer *)net.layers[i];
|
return ((normalization_layer *)net.layers[i]) -> output;
|
||||||
return layer.output;
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
float *get_network_output(network net)
|
float *get_network_output(network net)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -210,8 +173,6 @@ float *get_network_delta_layer(network net, int i)
|
|||||||
} else if(net.types[i] == DROPOUT){
|
} else if(net.types[i] == DROPOUT){
|
||||||
if(i == 0) return 0;
|
if(i == 0) return 0;
|
||||||
return get_network_delta_layer(net, i-1);
|
return get_network_delta_layer(net, i-1);
|
||||||
} else if(net.types[i] == FREEWEIGHT){
|
|
||||||
return get_network_delta_layer(net, i-1);
|
|
||||||
} else if(net.types[i] == CONNECTED){
|
} else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
connected_layer layer = *(connected_layer *)net.layers[i];
|
||||||
return layer.delta;
|
return layer.delta;
|
||||||
@ -257,54 +218,53 @@ int get_predicted_class_network(network net)
|
|||||||
return max_index(out, k);
|
return max_index(out, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_network(network net, float *input, float *truth)
|
void backward_network(network net, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float *prev_input;
|
float *original_input = state.input;
|
||||||
float *prev_delta;
|
|
||||||
for(i = net.n-1; i >= 0; --i){
|
for(i = net.n-1; i >= 0; --i){
|
||||||
if(i == 0){
|
if(i == 0){
|
||||||
prev_input = input;
|
state.input = original_input;
|
||||||
prev_delta = 0;
|
state.delta = 0;
|
||||||
}else{
|
}else{
|
||||||
prev_input = get_network_output_layer(net, i-1);
|
state.input = get_network_output_layer(net, i-1);
|
||||||
prev_delta = get_network_delta_layer(net, i-1);
|
state.delta = get_network_delta_layer(net, i-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||||
backward_convolutional_layer(layer, prev_input, prev_delta);
|
backward_convolutional_layer(layer, state);
|
||||||
} else if(net.types[i] == DECONVOLUTIONAL){
|
} else if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
||||||
backward_deconvolutional_layer(layer, prev_input, prev_delta);
|
backward_deconvolutional_layer(layer, state);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == MAXPOOL){
|
else if(net.types[i] == MAXPOOL){
|
||||||
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
||||||
if(i != 0) backward_maxpool_layer(layer, prev_delta);
|
if(i != 0) backward_maxpool_layer(layer, state);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DROPOUT){
|
else if(net.types[i] == DROPOUT){
|
||||||
dropout_layer layer = *(dropout_layer *)net.layers[i];
|
dropout_layer layer = *(dropout_layer *)net.layers[i];
|
||||||
backward_dropout_layer(layer, prev_delta);
|
backward_dropout_layer(layer, state);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DETECTION){
|
else if(net.types[i] == DETECTION){
|
||||||
detection_layer layer = *(detection_layer *)net.layers[i];
|
detection_layer layer = *(detection_layer *)net.layers[i];
|
||||||
backward_detection_layer(layer, prev_input, prev_delta);
|
backward_detection_layer(layer, state);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == NORMALIZATION){
|
else if(net.types[i] == NORMALIZATION){
|
||||||
normalization_layer layer = *(normalization_layer *)net.layers[i];
|
normalization_layer layer = *(normalization_layer *)net.layers[i];
|
||||||
if(i != 0) backward_normalization_layer(layer, prev_input, prev_delta);
|
if(i != 0) backward_normalization_layer(layer, state);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == SOFTMAX){
|
else if(net.types[i] == SOFTMAX){
|
||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
||||||
if(i != 0) backward_softmax_layer(layer, prev_delta);
|
if(i != 0) backward_softmax_layer(layer, state);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CONNECTED){
|
else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
connected_layer layer = *(connected_layer *)net.layers[i];
|
||||||
backward_connected_layer(layer, prev_input, prev_delta);
|
backward_connected_layer(layer, state);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == COST){
|
else if(net.types[i] == COST){
|
||||||
cost_layer layer = *(cost_layer *)net.layers[i];
|
cost_layer layer = *(cost_layer *)net.layers[i];
|
||||||
backward_cost_layer(layer, prev_input, prev_delta);
|
backward_cost_layer(layer, state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -314,8 +274,12 @@ float train_network_datum(network net, float *x, float *y)
|
|||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0) return train_network_datum_gpu(net, x, y);
|
if(gpu_index >= 0) return train_network_datum_gpu(net, x, y);
|
||||||
#endif
|
#endif
|
||||||
forward_network(net, x, y, 1);
|
network_state state;
|
||||||
backward_network(net, x, y);
|
state.input = x;
|
||||||
|
state.truth = y;
|
||||||
|
state.train = 1;
|
||||||
|
forward_network(net, state);
|
||||||
|
backward_network(net, state);
|
||||||
float error = get_network_cost(net);
|
float error = get_network_cost(net);
|
||||||
update_network(net);
|
update_network(net);
|
||||||
return error;
|
return error;
|
||||||
@ -361,15 +325,17 @@ float train_network(network net, data d)
|
|||||||
float train_network_batch(network net, data d, int n)
|
float train_network_batch(network net, data d, int n)
|
||||||
{
|
{
|
||||||
int i,j;
|
int i,j;
|
||||||
|
network_state state;
|
||||||
|
state.train = 1;
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
int batch = 2;
|
int batch = 2;
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
for(j = 0; j < batch; ++j){
|
for(j = 0; j < batch; ++j){
|
||||||
int index = rand()%d.X.rows;
|
int index = rand()%d.X.rows;
|
||||||
float *x = d.X.vals[index];
|
state.input = d.X.vals[index];
|
||||||
float *y = d.y.vals[index];
|
state.truth = d.y.vals[index];
|
||||||
forward_network(net, x, y, 1);
|
forward_network(net, state);
|
||||||
backward_network(net, x, y);
|
backward_network(net, state);
|
||||||
sum += get_network_cost(net);
|
sum += get_network_cost(net);
|
||||||
}
|
}
|
||||||
update_network(net);
|
update_network(net);
|
||||||
@ -377,28 +343,6 @@ float train_network_batch(network net, data d, int n)
|
|||||||
return (float)sum/(n*batch);
|
return (float)sum/(n*batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_learning_network(network *net, float rate, float momentum, float decay)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
net->learning_rate=rate;
|
|
||||||
net->momentum = momentum;
|
|
||||||
net->decay = decay;
|
|
||||||
for(i = 0; i < net->n; ++i){
|
|
||||||
if(net->types[i] == CONVOLUTIONAL){
|
|
||||||
convolutional_layer *layer = (convolutional_layer *)net->layers[i];
|
|
||||||
layer->learning_rate=rate;
|
|
||||||
layer->momentum = momentum;
|
|
||||||
layer->decay = decay;
|
|
||||||
}
|
|
||||||
else if(net->types[i] == CONNECTED){
|
|
||||||
connected_layer *layer = (connected_layer *)net->layers[i];
|
|
||||||
layer->learning_rate=rate;
|
|
||||||
layer->momentum = momentum;
|
|
||||||
layer->decay = decay;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_batch_network(network *net, int b)
|
void set_batch_network(network *net, int b)
|
||||||
{
|
{
|
||||||
net->batch = b;
|
net->batch = b;
|
||||||
@ -425,10 +369,6 @@ void set_batch_network(network *net, int b)
|
|||||||
detection_layer *layer = (detection_layer *) net->layers[i];
|
detection_layer *layer = (detection_layer *) net->layers[i];
|
||||||
layer->batch = b;
|
layer->batch = b;
|
||||||
}
|
}
|
||||||
else if(net->types[i] == FREEWEIGHT){
|
|
||||||
freeweight_layer *layer = (freeweight_layer *) net->layers[i];
|
|
||||||
layer->batch = b;
|
|
||||||
}
|
|
||||||
else if(net->types[i] == SOFTMAX){
|
else if(net->types[i] == SOFTMAX){
|
||||||
softmax_layer *layer = (softmax_layer *)net->layers[i];
|
softmax_layer *layer = (softmax_layer *)net->layers[i];
|
||||||
layer->batch = b;
|
layer->batch = b;
|
||||||
@ -472,15 +412,11 @@ int get_network_input_size_layer(network net, int i)
|
|||||||
crop_layer layer = *(crop_layer *) net.layers[i];
|
crop_layer layer = *(crop_layer *) net.layers[i];
|
||||||
return layer.c*layer.h*layer.w;
|
return layer.c*layer.h*layer.w;
|
||||||
}
|
}
|
||||||
else if(net.types[i] == FREEWEIGHT){
|
|
||||||
freeweight_layer layer = *(freeweight_layer *) net.layers[i];
|
|
||||||
return layer.inputs;
|
|
||||||
}
|
|
||||||
else if(net.types[i] == SOFTMAX){
|
else if(net.types[i] == SOFTMAX){
|
||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
||||||
return layer.inputs;
|
return layer.inputs;
|
||||||
}
|
}
|
||||||
printf("Can't find input size\n");
|
fprintf(stderr, "Can't find input size\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -505,7 +441,7 @@ int get_network_output_size_layer(network net, int i)
|
|||||||
image output = get_maxpool_image(layer);
|
image output = get_maxpool_image(layer);
|
||||||
return output.h*output.w*output.c;
|
return output.h*output.w*output.c;
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CROP){
|
else if(net.types[i] == CROP){
|
||||||
crop_layer layer = *(crop_layer *) net.layers[i];
|
crop_layer layer = *(crop_layer *) net.layers[i];
|
||||||
return layer.c*layer.crop_height*layer.crop_width;
|
return layer.c*layer.crop_height*layer.crop_width;
|
||||||
}
|
}
|
||||||
@ -517,15 +453,11 @@ int get_network_output_size_layer(network net, int i)
|
|||||||
dropout_layer layer = *(dropout_layer *) net.layers[i];
|
dropout_layer layer = *(dropout_layer *) net.layers[i];
|
||||||
return layer.inputs;
|
return layer.inputs;
|
||||||
}
|
}
|
||||||
else if(net.types[i] == FREEWEIGHT){
|
|
||||||
freeweight_layer layer = *(freeweight_layer *) net.layers[i];
|
|
||||||
return layer.inputs;
|
|
||||||
}
|
|
||||||
else if(net.types[i] == SOFTMAX){
|
else if(net.types[i] == SOFTMAX){
|
||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
||||||
return layer.inputs;
|
return layer.inputs;
|
||||||
}
|
}
|
||||||
printf("Can't find output size\n");
|
fprintf(stderr, "Can't find output size\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -650,11 +582,16 @@ void top_predictions(network net, int k, int *index)
|
|||||||
|
|
||||||
float *network_predict(network net, float *input)
|
float *network_predict(network net, float *input)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0) return network_predict_gpu(net, input);
|
if(gpu_index >= 0) return network_predict_gpu(net, input);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
forward_network(net, input, 0, 0);
|
network_state state;
|
||||||
|
state.input = input;
|
||||||
|
state.truth = 0;
|
||||||
|
state.train = 0;
|
||||||
|
state.delta = 0;
|
||||||
|
forward_network(net, state);
|
||||||
float *out = get_network_output(net);
|
float *out = get_network_output(net);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#define NETWORK_H
|
#define NETWORK_H
|
||||||
|
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
#include "params.h"
|
||||||
#include "data.h"
|
#include "data.h"
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
@ -14,7 +15,6 @@ typedef enum {
|
|||||||
DETECTION,
|
DETECTION,
|
||||||
NORMALIZATION,
|
NORMALIZATION,
|
||||||
DROPOUT,
|
DROPOUT,
|
||||||
FREEWEIGHT,
|
|
||||||
CROP,
|
CROP,
|
||||||
COST
|
COST
|
||||||
} LAYER_TYPE;
|
} LAYER_TYPE;
|
||||||
@ -31,6 +31,9 @@ typedef struct {
|
|||||||
int outputs;
|
int outputs;
|
||||||
float *output;
|
float *output;
|
||||||
|
|
||||||
|
int inputs;
|
||||||
|
int h, w, c;
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
float **input_gpu;
|
float **input_gpu;
|
||||||
float **truth_gpu;
|
float **truth_gpu;
|
||||||
@ -47,9 +50,9 @@ float * get_network_delta_gpu_layer(network net, int i);
|
|||||||
void compare_networks(network n1, network n2, data d);
|
void compare_networks(network n1, network n2, data d);
|
||||||
char *get_layer_string(LAYER_TYPE a);
|
char *get_layer_string(LAYER_TYPE a);
|
||||||
|
|
||||||
network make_network(int n, int batch);
|
network make_network(int n);
|
||||||
void forward_network(network net, float *input, float *truth, int train);
|
void forward_network(network net, network_state state);
|
||||||
void backward_network(network net, float *input, float *truth);
|
void backward_network(network net, network_state state);
|
||||||
void update_network(network net);
|
void update_network(network net);
|
||||||
|
|
||||||
float train_network(network net, data d);
|
float train_network(network net, data d);
|
||||||
@ -75,7 +78,6 @@ void print_network(network net);
|
|||||||
void visualize_network(network net);
|
void visualize_network(network net);
|
||||||
int resize_network(network net, int h, int w, int c);
|
int resize_network(network net, int h, int w, int c);
|
||||||
void set_batch_network(network *net, int b);
|
void set_batch_network(network *net, int b);
|
||||||
void set_learning_network(network *net, float rate, float momentum, float decay);
|
|
||||||
int get_network_input_size(network net);
|
int get_network_input_size(network net);
|
||||||
float get_network_cost(network net);
|
float get_network_cost(network net);
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ extern "C" {
|
|||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "data.h"
|
#include "data.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
#include "crop_layer.h"
|
#include "crop_layer.h"
|
||||||
#include "connected_layer.h"
|
#include "connected_layer.h"
|
||||||
@ -15,7 +16,6 @@ extern "C" {
|
|||||||
#include "maxpool_layer.h"
|
#include "maxpool_layer.h"
|
||||||
#include "cost_layer.h"
|
#include "cost_layer.h"
|
||||||
#include "normalization_layer.h"
|
#include "normalization_layer.h"
|
||||||
#include "freeweight_layer.h"
|
|
||||||
#include "softmax_layer.h"
|
#include "softmax_layer.h"
|
||||||
#include "dropout_layer.h"
|
#include "dropout_layer.h"
|
||||||
}
|
}
|
||||||
@ -24,108 +24,78 @@ extern "C" float * get_network_output_gpu_layer(network net, int i);
|
|||||||
extern "C" float * get_network_delta_gpu_layer(network net, int i);
|
extern "C" float * get_network_delta_gpu_layer(network net, int i);
|
||||||
float *get_network_output_gpu(network net);
|
float *get_network_output_gpu(network net);
|
||||||
|
|
||||||
void forward_network_gpu(network net, float * input, float * truth, int train)
|
void forward_network_gpu(network net, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
//clock_t time = clock();
|
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
forward_convolutional_layer_gpu(*(convolutional_layer *)net.layers[i], state);
|
||||||
forward_convolutional_layer_gpu(layer, input);
|
|
||||||
input = layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DECONVOLUTIONAL){
|
else if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
forward_deconvolutional_layer_gpu(*(deconvolutional_layer *)net.layers[i], state);
|
||||||
forward_deconvolutional_layer_gpu(layer, input);
|
|
||||||
input = layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == COST){
|
else if(net.types[i] == COST){
|
||||||
cost_layer layer = *(cost_layer *)net.layers[i];
|
forward_cost_layer_gpu(*(cost_layer *)net.layers[i], state);
|
||||||
forward_cost_layer_gpu(layer, input, truth);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CONNECTED){
|
else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
forward_connected_layer_gpu(*(connected_layer *)net.layers[i], state);
|
||||||
forward_connected_layer_gpu(layer, input);
|
|
||||||
input = layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DETECTION){
|
else if(net.types[i] == DETECTION){
|
||||||
detection_layer layer = *(detection_layer *)net.layers[i];
|
forward_detection_layer_gpu(*(detection_layer *)net.layers[i], state);
|
||||||
forward_detection_layer_gpu(layer, input, truth);
|
|
||||||
input = layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == MAXPOOL){
|
else if(net.types[i] == MAXPOOL){
|
||||||
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
forward_maxpool_layer_gpu(*(maxpool_layer *)net.layers[i], state);
|
||||||
forward_maxpool_layer_gpu(layer, input);
|
|
||||||
input = layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == SOFTMAX){
|
else if(net.types[i] == SOFTMAX){
|
||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
forward_softmax_layer_gpu(*(softmax_layer *)net.layers[i], state);
|
||||||
forward_softmax_layer_gpu(layer, input);
|
|
||||||
input = layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DROPOUT){
|
else if(net.types[i] == DROPOUT){
|
||||||
if(!train) continue;
|
forward_dropout_layer_gpu(*(dropout_layer *)net.layers[i], state);
|
||||||
dropout_layer layer = *(dropout_layer *)net.layers[i];
|
|
||||||
forward_dropout_layer_gpu(layer, input);
|
|
||||||
input = layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CROP){
|
else if(net.types[i] == CROP){
|
||||||
crop_layer layer = *(crop_layer *)net.layers[i];
|
forward_crop_layer_gpu(*(crop_layer *)net.layers[i], state);
|
||||||
forward_crop_layer_gpu(layer, train, input);
|
|
||||||
input = layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
//cudaDeviceSynchronize();
|
state.input = get_network_output_gpu_layer(net, i);
|
||||||
//printf("Forward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_network_gpu(network net, float * input, float *truth)
|
void backward_network_gpu(network net, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float * prev_input;
|
float * original_input = state.input;
|
||||||
float * prev_delta;
|
|
||||||
for(i = net.n-1; i >= 0; --i){
|
for(i = net.n-1; i >= 0; --i){
|
||||||
//clock_t time = clock();
|
//clock_t time = clock();
|
||||||
if(i == 0){
|
if(i == 0){
|
||||||
prev_input = input;
|
state.input = original_input;
|
||||||
prev_delta = 0;
|
state.delta = 0;
|
||||||
}else{
|
}else{
|
||||||
prev_input = get_network_output_gpu_layer(net, i-1);
|
state.input = get_network_output_gpu_layer(net, i-1);
|
||||||
prev_delta = get_network_delta_gpu_layer(net, i-1);
|
state.delta = get_network_delta_gpu_layer(net, i-1);
|
||||||
}
|
}
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
backward_convolutional_layer_gpu(*(convolutional_layer *)net.layers[i], state);
|
||||||
backward_convolutional_layer_gpu(layer, prev_input, prev_delta);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DECONVOLUTIONAL){
|
else if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
backward_deconvolutional_layer_gpu(*(deconvolutional_layer *)net.layers[i], state);
|
||||||
backward_deconvolutional_layer_gpu(layer, prev_input, prev_delta);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == COST){
|
else if(net.types[i] == COST){
|
||||||
cost_layer layer = *(cost_layer *)net.layers[i];
|
backward_cost_layer_gpu(*(cost_layer *)net.layers[i], state);
|
||||||
backward_cost_layer_gpu(layer, prev_input, prev_delta);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CONNECTED){
|
else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
backward_connected_layer_gpu(*(connected_layer *)net.layers[i], state);
|
||||||
backward_connected_layer_gpu(layer, prev_input, prev_delta);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DETECTION){
|
else if(net.types[i] == DETECTION){
|
||||||
detection_layer layer = *(detection_layer *)net.layers[i];
|
backward_detection_layer_gpu(*(detection_layer *)net.layers[i], state);
|
||||||
backward_detection_layer_gpu(layer, prev_input, prev_delta);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == MAXPOOL){
|
else if(net.types[i] == MAXPOOL){
|
||||||
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
backward_maxpool_layer_gpu(*(maxpool_layer *)net.layers[i], state);
|
||||||
backward_maxpool_layer_gpu(layer, prev_delta);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DROPOUT){
|
else if(net.types[i] == DROPOUT){
|
||||||
dropout_layer layer = *(dropout_layer *)net.layers[i];
|
backward_dropout_layer_gpu(*(dropout_layer *)net.layers[i], state);
|
||||||
backward_dropout_layer_gpu(layer, prev_delta);
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == SOFTMAX){
|
else if(net.types[i] == SOFTMAX){
|
||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
backward_softmax_layer_gpu(*(softmax_layer *)net.layers[i], state);
|
||||||
backward_softmax_layer_gpu(layer, prev_delta);
|
|
||||||
}
|
}
|
||||||
//printf("Backward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,15 +105,15 @@ void update_network_gpu(network net)
|
|||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||||
update_convolutional_layer_gpu(layer);
|
update_convolutional_layer_gpu(layer, net.learning_rate, net.momentum, net.decay);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DECONVOLUTIONAL){
|
else if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
||||||
update_deconvolutional_layer_gpu(layer);
|
update_deconvolutional_layer_gpu(layer, net.learning_rate, net.momentum, net.decay);
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CONNECTED){
|
else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
connected_layer layer = *(connected_layer *)net.layers[i];
|
||||||
update_connected_layer_gpu(layer);
|
update_connected_layer_gpu(layer, net.learning_rate, net.momentum, net.decay);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -151,35 +121,28 @@ void update_network_gpu(network net)
|
|||||||
float * get_network_output_gpu_layer(network net, int i)
|
float * get_network_output_gpu_layer(network net, int i)
|
||||||
{
|
{
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
return ((convolutional_layer *)net.layers[i]) -> output_gpu;
|
||||||
return layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DECONVOLUTIONAL){
|
else if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i];
|
return ((deconvolutional_layer *)net.layers[i]) -> output_gpu;
|
||||||
return layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == DETECTION){
|
else if(net.types[i] == DETECTION){
|
||||||
detection_layer layer = *(detection_layer *)net.layers[i];
|
return ((detection_layer *)net.layers[i]) -> output_gpu;
|
||||||
return layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CONNECTED){
|
else if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *)net.layers[i];
|
return ((connected_layer *)net.layers[i]) -> output_gpu;
|
||||||
return layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == MAXPOOL){
|
else if(net.types[i] == MAXPOOL){
|
||||||
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
|
return ((maxpool_layer *)net.layers[i]) -> output_gpu;
|
||||||
return layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == CROP){
|
else if(net.types[i] == CROP){
|
||||||
crop_layer layer = *(crop_layer *)net.layers[i];
|
return ((crop_layer *)net.layers[i]) -> output_gpu;
|
||||||
return layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
else if(net.types[i] == SOFTMAX){
|
else if(net.types[i] == SOFTMAX){
|
||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
return ((softmax_layer *)net.layers[i]) -> output_gpu;
|
||||||
return layer.output_gpu;
|
}
|
||||||
} else if(net.types[i] == DROPOUT){
|
else if(net.types[i] == DROPOUT){
|
||||||
dropout_layer layer = *(dropout_layer *)net.layers[i];
|
return get_network_output_gpu_layer(net, i-1);
|
||||||
return layer.output_gpu;
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -219,6 +182,7 @@ float * get_network_delta_gpu_layer(network net, int i)
|
|||||||
float train_network_datum_gpu(network net, float *x, float *y)
|
float train_network_datum_gpu(network net, float *x, float *y)
|
||||||
{
|
{
|
||||||
//clock_t time = clock();
|
//clock_t time = clock();
|
||||||
|
network_state state;
|
||||||
int x_size = get_network_input_size(net)*net.batch;
|
int x_size = get_network_input_size(net)*net.batch;
|
||||||
int y_size = get_network_output_size(net)*net.batch;
|
int y_size = get_network_output_size(net)*net.batch;
|
||||||
if(!*net.input_gpu){
|
if(!*net.input_gpu){
|
||||||
@ -228,12 +192,15 @@ float train_network_datum_gpu(network net, float *x, float *y)
|
|||||||
cuda_push_array(*net.input_gpu, x, x_size);
|
cuda_push_array(*net.input_gpu, x, x_size);
|
||||||
cuda_push_array(*net.truth_gpu, y, y_size);
|
cuda_push_array(*net.truth_gpu, y, y_size);
|
||||||
}
|
}
|
||||||
|
state.input = *net.input_gpu;
|
||||||
|
state.truth = *net.truth_gpu;
|
||||||
|
state.train = 1;
|
||||||
//printf("trans %f\n", sec(clock() - time));
|
//printf("trans %f\n", sec(clock() - time));
|
||||||
//time = clock();
|
//time = clock();
|
||||||
forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1);
|
forward_network_gpu(net, state);
|
||||||
//printf("forw %f\n", sec(clock() - time));
|
//printf("forw %f\n", sec(clock() - time));
|
||||||
//time = clock();
|
//time = clock();
|
||||||
backward_network_gpu(net, *net.input_gpu, *net.truth_gpu);
|
backward_network_gpu(net, state);
|
||||||
//printf("back %f\n", sec(clock() - time));
|
//printf("back %f\n", sec(clock() - time));
|
||||||
//time = clock();
|
//time = clock();
|
||||||
update_network_gpu(net);
|
update_network_gpu(net);
|
||||||
@ -291,10 +258,14 @@ float *network_predict_gpu(network net, float *input)
|
|||||||
{
|
{
|
||||||
|
|
||||||
int size = get_network_input_size(net) * net.batch;
|
int size = get_network_input_size(net) * net.batch;
|
||||||
float * input_gpu = cuda_make_array(input, size);
|
network_state state;
|
||||||
forward_network_gpu(net, input_gpu, 0, 0);
|
state.input = cuda_make_array(input, size);
|
||||||
|
state.truth = 0;
|
||||||
|
state.train = 0;
|
||||||
|
state.delta = 0;
|
||||||
|
forward_network_gpu(net, state);
|
||||||
float *out = get_network_output_gpu(net);
|
float *out = get_network_output_gpu(net);
|
||||||
cuda_free(input_gpu);
|
cuda_free(state.input);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,28 +59,29 @@ void sub_square_array(float *src, float *dest, int n)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_normalization_layer(const normalization_layer layer, float *in)
|
void forward_normalization_layer(const normalization_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i,j,k;
|
int i,j,k;
|
||||||
memset(layer.sums, 0, layer.h*layer.w*sizeof(float));
|
memset(layer.sums, 0, layer.h*layer.w*sizeof(float));
|
||||||
int imsize = layer.h*layer.w;
|
int imsize = layer.h*layer.w;
|
||||||
for(j = 0; j < layer.size/2; ++j){
|
for(j = 0; j < layer.size/2; ++j){
|
||||||
if(j < layer.c) add_square_array(in+j*imsize, layer.sums, imsize);
|
if(j < layer.c) add_square_array(state.input+j*imsize, layer.sums, imsize);
|
||||||
}
|
}
|
||||||
for(k = 0; k < layer.c; ++k){
|
for(k = 0; k < layer.c; ++k){
|
||||||
int next = k+layer.size/2;
|
int next = k+layer.size/2;
|
||||||
int prev = k-layer.size/2-1;
|
int prev = k-layer.size/2-1;
|
||||||
if(next < layer.c) add_square_array(in+next*imsize, layer.sums, imsize);
|
if(next < layer.c) add_square_array(state.input+next*imsize, layer.sums, imsize);
|
||||||
if(prev > 0) sub_square_array(in+prev*imsize, layer.sums, imsize);
|
if(prev > 0) sub_square_array(state.input+prev*imsize, layer.sums, imsize);
|
||||||
for(i = 0; i < imsize; ++i){
|
for(i = 0; i < imsize; ++i){
|
||||||
layer.output[k*imsize + i] = in[k*imsize+i] / pow(layer.kappa + layer.alpha * layer.sums[i], layer.beta);
|
layer.output[k*imsize + i] = state.input[k*imsize+i] / pow(layer.kappa + layer.alpha * layer.sums[i], layer.beta);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_normalization_layer(const normalization_layer layer, float *in, float *delta)
|
void backward_normalization_layer(const normalization_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
//TODO!
|
// TODO!
|
||||||
|
// OR NOT TODO!!
|
||||||
}
|
}
|
||||||
|
|
||||||
void visualize_normalization_layer(normalization_layer layer, char *window)
|
void visualize_normalization_layer(normalization_layer layer, char *window)
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#define NORMALIZATION_LAYER_H
|
#define NORMALIZATION_LAYER_H
|
||||||
|
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int batch;
|
int batch;
|
||||||
@ -18,8 +19,8 @@ typedef struct {
|
|||||||
image get_normalization_image(normalization_layer layer);
|
image get_normalization_image(normalization_layer layer);
|
||||||
normalization_layer *make_normalization_layer(int batch, int h, int w, int c, int size, float alpha, float beta, float kappa);
|
normalization_layer *make_normalization_layer(int batch, int h, int w, int c, int size, float alpha, float beta, float kappa);
|
||||||
void resize_normalization_layer(normalization_layer *layer, int h, int w);
|
void resize_normalization_layer(normalization_layer *layer, int h, int w);
|
||||||
void forward_normalization_layer(const normalization_layer layer, float *in);
|
void forward_normalization_layer(const normalization_layer layer, network_state state);
|
||||||
void backward_normalization_layer(const normalization_layer layer, float *in, float *delta);
|
void backward_normalization_layer(const normalization_layer layer, network_state state);
|
||||||
void visualize_normalization_layer(normalization_layer layer, char *window);
|
void visualize_normalization_layer(normalization_layer layer, char *window);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
12
src/params.h
Normal file
12
src/params.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#ifndef PARAMS_H
|
||||||
|
#define PARAMS_H
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
float *truth;
|
||||||
|
float *input;
|
||||||
|
float *delta;
|
||||||
|
int train;
|
||||||
|
} network_state;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
474
src/parser.c
474
src/parser.c
@ -14,7 +14,6 @@
|
|||||||
#include "softmax_layer.h"
|
#include "softmax_layer.h"
|
||||||
#include "dropout_layer.h"
|
#include "dropout_layer.h"
|
||||||
#include "detection_layer.h"
|
#include "detection_layer.h"
|
||||||
#include "freeweight_layer.h"
|
|
||||||
#include "list.h"
|
#include "list.h"
|
||||||
#include "option_list.h"
|
#include "option_list.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
@ -24,12 +23,12 @@ typedef struct{
|
|||||||
list *options;
|
list *options;
|
||||||
}section;
|
}section;
|
||||||
|
|
||||||
|
int is_network(section *s);
|
||||||
int is_convolutional(section *s);
|
int is_convolutional(section *s);
|
||||||
int is_deconvolutional(section *s);
|
int is_deconvolutional(section *s);
|
||||||
int is_connected(section *s);
|
int is_connected(section *s);
|
||||||
int is_maxpool(section *s);
|
int is_maxpool(section *s);
|
||||||
int is_dropout(section *s);
|
int is_dropout(section *s);
|
||||||
int is_freeweight(section *s);
|
|
||||||
int is_softmax(section *s);
|
int is_softmax(section *s);
|
||||||
int is_crop(section *s);
|
int is_crop(section *s);
|
||||||
int is_cost(section *s);
|
int is_cost(section *s);
|
||||||
@ -69,38 +68,31 @@ void parse_data(char *data, float *a, int n)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
deconvolutional_layer *parse_deconvolutional(list *options, network *net, int count)
|
typedef struct size_params{
|
||||||
|
int batch;
|
||||||
|
int inputs;
|
||||||
|
int h;
|
||||||
|
int w;
|
||||||
|
int c;
|
||||||
|
} size_params;
|
||||||
|
|
||||||
|
deconvolutional_layer *parse_deconvolutional(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int h,w,c;
|
|
||||||
float learning_rate, momentum, decay;
|
|
||||||
int n = option_find_int(options, "filters",1);
|
int n = option_find_int(options, "filters",1);
|
||||||
int size = option_find_int(options, "size",1);
|
int size = option_find_int(options, "size",1);
|
||||||
int stride = option_find_int(options, "stride",1);
|
int stride = option_find_int(options, "stride",1);
|
||||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||||
ACTIVATION activation = get_activation(activation_s);
|
ACTIVATION activation = get_activation(activation_s);
|
||||||
if(count == 0){
|
|
||||||
learning_rate = option_find_float(options, "learning_rate", .001);
|
int batch,h,w,c;
|
||||||
momentum = option_find_float(options, "momentum", .9);
|
h = params.h;
|
||||||
decay = option_find_float(options, "decay", .0001);
|
w = params.w;
|
||||||
h = option_find_int(options, "height",1);
|
c = params.c;
|
||||||
w = option_find_int(options, "width",1);
|
batch=params.batch;
|
||||||
c = option_find_int(options, "channels",1);
|
if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
|
||||||
net->batch = option_find_int(options, "batch",1);
|
|
||||||
net->learning_rate = learning_rate;
|
deconvolutional_layer *layer = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation);
|
||||||
net->momentum = momentum;
|
|
||||||
net->decay = decay;
|
|
||||||
net->seen = option_find_int(options, "seen",0);
|
|
||||||
}else{
|
|
||||||
learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate);
|
|
||||||
momentum = option_find_float_quiet(options, "momentum", net->momentum);
|
|
||||||
decay = option_find_float_quiet(options, "decay", net->decay);
|
|
||||||
image m = get_network_image_layer(*net, count-1);
|
|
||||||
h = m.h;
|
|
||||||
w = m.w;
|
|
||||||
c = m.c;
|
|
||||||
if(h == 0) error("Layer before deconvolutional layer must output image.");
|
|
||||||
}
|
|
||||||
deconvolutional_layer *layer = make_deconvolutional_layer(net->batch,h,w,c,n,size,stride,activation,learning_rate,momentum,decay);
|
|
||||||
char *weights = option_find_str(options, "weights", 0);
|
char *weights = option_find_str(options, "weights", 0);
|
||||||
char *biases = option_find_str(options, "biases", 0);
|
char *biases = option_find_str(options, "biases", 0);
|
||||||
parse_data(weights, layer->filters, c*n*size*size);
|
parse_data(weights, layer->filters, c*n*size*size);
|
||||||
@ -112,39 +104,24 @@ deconvolutional_layer *parse_deconvolutional(list *options, network *net, int co
|
|||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
convolutional_layer *parse_convolutional(list *options, network *net, int count)
|
convolutional_layer *parse_convolutional(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int h,w,c;
|
|
||||||
float learning_rate, momentum, decay;
|
|
||||||
int n = option_find_int(options, "filters",1);
|
int n = option_find_int(options, "filters",1);
|
||||||
int size = option_find_int(options, "size",1);
|
int size = option_find_int(options, "size",1);
|
||||||
int stride = option_find_int(options, "stride",1);
|
int stride = option_find_int(options, "stride",1);
|
||||||
int pad = option_find_int(options, "pad",0);
|
int pad = option_find_int(options, "pad",0);
|
||||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||||
ACTIVATION activation = get_activation(activation_s);
|
ACTIVATION activation = get_activation(activation_s);
|
||||||
if(count == 0){
|
|
||||||
learning_rate = option_find_float(options, "learning_rate", .001);
|
int batch,h,w,c;
|
||||||
momentum = option_find_float(options, "momentum", .9);
|
h = params.h;
|
||||||
decay = option_find_float(options, "decay", .0001);
|
w = params.w;
|
||||||
h = option_find_int(options, "height",1);
|
c = params.c;
|
||||||
w = option_find_int(options, "width",1);
|
batch=params.batch;
|
||||||
c = option_find_int(options, "channels",1);
|
if(!(h && w && c)) error("Layer before convolutional layer must output image.");
|
||||||
net->batch = option_find_int(options, "batch",1);
|
|
||||||
net->learning_rate = learning_rate;
|
convolutional_layer *layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation);
|
||||||
net->momentum = momentum;
|
|
||||||
net->decay = decay;
|
|
||||||
net->seen = option_find_int(options, "seen",0);
|
|
||||||
}else{
|
|
||||||
learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate);
|
|
||||||
momentum = option_find_float_quiet(options, "momentum", net->momentum);
|
|
||||||
decay = option_find_float_quiet(options, "decay", net->decay);
|
|
||||||
image m = get_network_image_layer(*net, count-1);
|
|
||||||
h = m.h;
|
|
||||||
w = m.w;
|
|
||||||
c = m.c;
|
|
||||||
if(h == 0) error("Layer before convolutional layer must output image.");
|
|
||||||
}
|
|
||||||
convolutional_layer *layer = make_convolutional_layer(net->batch,h,w,c,n,size,stride,pad,activation,learning_rate,momentum,decay);
|
|
||||||
char *weights = option_find_str(options, "weights", 0);
|
char *weights = option_find_str(options, "weights", 0);
|
||||||
char *biases = option_find_str(options, "biases", 0);
|
char *biases = option_find_str(options, "biases", 0);
|
||||||
parse_data(weights, layer->filters, c*n*size*size);
|
parse_data(weights, layer->filters, c*n*size*size);
|
||||||
@ -156,33 +133,18 @@ convolutional_layer *parse_convolutional(list *options, network *net, int count)
|
|||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
connected_layer *parse_connected(list *options, network *net, int count)
|
connected_layer *parse_connected(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int input;
|
|
||||||
float learning_rate, momentum, decay;
|
|
||||||
int output = option_find_int(options, "output",1);
|
int output = option_find_int(options, "output",1);
|
||||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||||
ACTIVATION activation = get_activation(activation_s);
|
ACTIVATION activation = get_activation(activation_s);
|
||||||
if(count == 0){
|
|
||||||
input = option_find_int(options, "input",1);
|
connected_layer *layer = make_connected_layer(params.batch, params.inputs, output, activation);
|
||||||
net->batch = option_find_int(options, "batch",1);
|
|
||||||
learning_rate = option_find_float(options, "learning_rate", .001);
|
|
||||||
momentum = option_find_float(options, "momentum", .9);
|
|
||||||
decay = option_find_float(options, "decay", .0001);
|
|
||||||
net->learning_rate = learning_rate;
|
|
||||||
net->momentum = momentum;
|
|
||||||
net->decay = decay;
|
|
||||||
}else{
|
|
||||||
learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate);
|
|
||||||
momentum = option_find_float_quiet(options, "momentum", net->momentum);
|
|
||||||
decay = option_find_float_quiet(options, "decay", net->decay);
|
|
||||||
input = get_network_output_size_layer(*net, count-1);
|
|
||||||
}
|
|
||||||
connected_layer *layer = make_connected_layer(net->batch, input, output, activation,learning_rate,momentum,decay);
|
|
||||||
char *weights = option_find_str(options, "weights", 0);
|
char *weights = option_find_str(options, "weights", 0);
|
||||||
char *biases = option_find_str(options, "biases", 0);
|
char *biases = option_find_str(options, "biases", 0);
|
||||||
parse_data(biases, layer->biases, output);
|
parse_data(biases, layer->biases, output);
|
||||||
parse_data(weights, layer->weights, input*output);
|
parse_data(weights, layer->weights, params.inputs*output);
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(weights || biases) push_connected_layer(*layer);
|
if(weights || biases) push_connected_layer(*layer);
|
||||||
#endif
|
#endif
|
||||||
@ -190,235 +152,188 @@ connected_layer *parse_connected(list *options, network *net, int count)
|
|||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
softmax_layer *parse_softmax(list *options, network *net, int count)
|
softmax_layer *parse_softmax(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int input;
|
|
||||||
int groups = option_find_int(options, "groups",1);
|
int groups = option_find_int(options, "groups",1);
|
||||||
if(count == 0){
|
softmax_layer *layer = make_softmax_layer(params.batch, params.inputs, groups);
|
||||||
input = option_find_int(options, "input",1);
|
|
||||||
net->batch = option_find_int(options, "batch",1);
|
|
||||||
net->seen = option_find_int(options, "seen",0);
|
|
||||||
}else{
|
|
||||||
input = get_network_output_size_layer(*net, count-1);
|
|
||||||
}
|
|
||||||
softmax_layer *layer = make_softmax_layer(net->batch, groups, input);
|
|
||||||
option_unused(options);
|
option_unused(options);
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
detection_layer *parse_detection(list *options, network *net, int count)
|
detection_layer *parse_detection(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int input;
|
|
||||||
if(count == 0){
|
|
||||||
input = option_find_int(options, "input",1);
|
|
||||||
net->batch = option_find_int(options, "batch",1);
|
|
||||||
net->seen = option_find_int(options, "seen",0);
|
|
||||||
}else{
|
|
||||||
input = get_network_output_size_layer(*net, count-1);
|
|
||||||
}
|
|
||||||
int coords = option_find_int(options, "coords", 1);
|
int coords = option_find_int(options, "coords", 1);
|
||||||
int classes = option_find_int(options, "classes", 1);
|
int classes = option_find_int(options, "classes", 1);
|
||||||
int rescore = option_find_int(options, "rescore", 1);
|
int rescore = option_find_int(options, "rescore", 1);
|
||||||
detection_layer *layer = make_detection_layer(net->batch, input, classes, coords, rescore);
|
detection_layer *layer = make_detection_layer(params.batch, params.inputs, classes, coords, rescore);
|
||||||
option_unused(options);
|
option_unused(options);
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
cost_layer *parse_cost(list *options, network *net, int count)
|
cost_layer *parse_cost(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int input;
|
|
||||||
if(count == 0){
|
|
||||||
input = option_find_int(options, "input",1);
|
|
||||||
net->batch = option_find_int(options, "batch",1);
|
|
||||||
net->seen = option_find_int(options, "seen",0);
|
|
||||||
}else{
|
|
||||||
input = get_network_output_size_layer(*net, count-1);
|
|
||||||
}
|
|
||||||
char *type_s = option_find_str(options, "type", "sse");
|
char *type_s = option_find_str(options, "type", "sse");
|
||||||
COST_TYPE type = get_cost_type(type_s);
|
COST_TYPE type = get_cost_type(type_s);
|
||||||
cost_layer *layer = make_cost_layer(net->batch, input, type);
|
cost_layer *layer = make_cost_layer(params.batch, params.inputs, type);
|
||||||
option_unused(options);
|
option_unused(options);
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
crop_layer *parse_crop(list *options, network *net, int count)
|
crop_layer *parse_crop(list *options, size_params params)
|
||||||
{
|
{
|
||||||
float learning_rate, momentum, decay;
|
|
||||||
int h,w,c;
|
|
||||||
int crop_height = option_find_int(options, "crop_height",1);
|
int crop_height = option_find_int(options, "crop_height",1);
|
||||||
int crop_width = option_find_int(options, "crop_width",1);
|
int crop_width = option_find_int(options, "crop_width",1);
|
||||||
int flip = option_find_int(options, "flip",0);
|
int flip = option_find_int(options, "flip",0);
|
||||||
if(count == 0){
|
|
||||||
h = option_find_int(options, "height",1);
|
int batch,h,w,c;
|
||||||
w = option_find_int(options, "width",1);
|
h = params.h;
|
||||||
c = option_find_int(options, "channels",1);
|
w = params.w;
|
||||||
net->batch = option_find_int(options, "batch",1);
|
c = params.c;
|
||||||
learning_rate = option_find_float(options, "learning_rate", .001);
|
batch=params.batch;
|
||||||
momentum = option_find_float(options, "momentum", .9);
|
if(!(h && w && c)) error("Layer before crop layer must output image.");
|
||||||
decay = option_find_float(options, "decay", .0001);
|
|
||||||
net->learning_rate = learning_rate;
|
crop_layer *layer = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip);
|
||||||
net->momentum = momentum;
|
|
||||||
net->decay = decay;
|
|
||||||
net->seen = option_find_int(options, "seen",0);
|
|
||||||
}else{
|
|
||||||
image m = get_network_image_layer(*net, count-1);
|
|
||||||
h = m.h;
|
|
||||||
w = m.w;
|
|
||||||
c = m.c;
|
|
||||||
if(h == 0) error("Layer before crop layer must output image.");
|
|
||||||
}
|
|
||||||
crop_layer *layer = make_crop_layer(net->batch,h,w,c,crop_height,crop_width,flip);
|
|
||||||
option_unused(options);
|
option_unused(options);
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
maxpool_layer *parse_maxpool(list *options, network *net, int count)
|
maxpool_layer *parse_maxpool(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int h,w,c;
|
|
||||||
int stride = option_find_int(options, "stride",1);
|
int stride = option_find_int(options, "stride",1);
|
||||||
int size = option_find_int(options, "size",stride);
|
int size = option_find_int(options, "size",stride);
|
||||||
if(count == 0){
|
|
||||||
h = option_find_int(options, "height",1);
|
int batch,h,w,c;
|
||||||
w = option_find_int(options, "width",1);
|
h = params.h;
|
||||||
c = option_find_int(options, "channels",1);
|
w = params.w;
|
||||||
net->batch = option_find_int(options, "batch",1);
|
c = params.c;
|
||||||
net->seen = option_find_int(options, "seen",0);
|
batch=params.batch;
|
||||||
}else{
|
if(!(h && w && c)) error("Layer before maxpool layer must output image.");
|
||||||
image m = get_network_image_layer(*net, count-1);
|
|
||||||
h = m.h;
|
maxpool_layer *layer = make_maxpool_layer(batch,h,w,c,size,stride);
|
||||||
w = m.w;
|
|
||||||
c = m.c;
|
|
||||||
if(h == 0) error("Layer before convolutional layer must output image.");
|
|
||||||
}
|
|
||||||
maxpool_layer *layer = make_maxpool_layer(net->batch,h,w,c,size,stride);
|
|
||||||
option_unused(options);
|
option_unused(options);
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
dropout_layer *parse_dropout(list *options, size_params params)
|
||||||
freeweight_layer *parse_freeweight(list *options, network *net, int count)
|
|
||||||
{
|
{
|
||||||
int input;
|
|
||||||
if(count == 0){
|
|
||||||
net->batch = option_find_int(options, "batch",1);
|
|
||||||
input = option_find_int(options, "input",1);
|
|
||||||
}else{
|
|
||||||
input = get_network_output_size_layer(*net, count-1);
|
|
||||||
}
|
|
||||||
freeweight_layer *layer = make_freeweight_layer(net->batch,input);
|
|
||||||
option_unused(options);
|
|
||||||
return layer;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
dropout_layer *parse_dropout(list *options, network *net, int count)
|
|
||||||
{
|
|
||||||
int input;
|
|
||||||
float probability = option_find_float(options, "probability", .5);
|
float probability = option_find_float(options, "probability", .5);
|
||||||
if(count == 0){
|
dropout_layer *layer = make_dropout_layer(params.batch, params.inputs, probability);
|
||||||
net->batch = option_find_int(options, "batch",1);
|
|
||||||
input = option_find_int(options, "input",1);
|
|
||||||
float learning_rate = option_find_float(options, "learning_rate", .001);
|
|
||||||
float momentum = option_find_float(options, "momentum", .9);
|
|
||||||
float decay = option_find_float(options, "decay", .0001);
|
|
||||||
net->learning_rate = learning_rate;
|
|
||||||
net->momentum = momentum;
|
|
||||||
net->decay = decay;
|
|
||||||
net->seen = option_find_int(options, "seen",0);
|
|
||||||
}else{
|
|
||||||
input = get_network_output_size_layer(*net, count-1);
|
|
||||||
}
|
|
||||||
dropout_layer *layer = make_dropout_layer(net->batch,input,probability);
|
|
||||||
option_unused(options);
|
option_unused(options);
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
normalization_layer *parse_normalization(list *options, network *net, int count)
|
normalization_layer *parse_normalization(list *options, size_params params)
|
||||||
{
|
{
|
||||||
int h,w,c;
|
|
||||||
int size = option_find_int(options, "size",1);
|
int size = option_find_int(options, "size",1);
|
||||||
float alpha = option_find_float(options, "alpha", 0.);
|
float alpha = option_find_float(options, "alpha", 0.);
|
||||||
float beta = option_find_float(options, "beta", 1.);
|
float beta = option_find_float(options, "beta", 1.);
|
||||||
float kappa = option_find_float(options, "kappa", 1.);
|
float kappa = option_find_float(options, "kappa", 1.);
|
||||||
if(count == 0){
|
|
||||||
h = option_find_int(options, "height",1);
|
int batch,h,w,c;
|
||||||
w = option_find_int(options, "width",1);
|
h = params.h;
|
||||||
c = option_find_int(options, "channels",1);
|
w = params.w;
|
||||||
net->batch = option_find_int(options, "batch",1);
|
c = params.c;
|
||||||
net->seen = option_find_int(options, "seen",0);
|
batch=params.batch;
|
||||||
}else{
|
if(!(h && w && c)) error("Layer before normalization layer must output image.");
|
||||||
image m = get_network_image_layer(*net, count-1);
|
|
||||||
h = m.h;
|
normalization_layer *layer = make_normalization_layer(batch,h,w,c,size, alpha, beta, kappa);
|
||||||
w = m.w;
|
|
||||||
c = m.c;
|
|
||||||
if(h == 0) error("Layer before convolutional layer must output image.");
|
|
||||||
}
|
|
||||||
normalization_layer *layer = make_normalization_layer(net->batch,h,w,c,size, alpha, beta, kappa);
|
|
||||||
option_unused(options);
|
option_unused(options);
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void parse_net_options(list *options, network *net)
|
||||||
|
{
|
||||||
|
net->batch = option_find_int(options, "batch",1);
|
||||||
|
net->learning_rate = option_find_float(options, "learning_rate", .001);
|
||||||
|
net->momentum = option_find_float(options, "momentum", .9);
|
||||||
|
net->decay = option_find_float(options, "decay", .0001);
|
||||||
|
net->seen = option_find_int(options, "seen",0);
|
||||||
|
|
||||||
|
net->h = option_find_int_quiet(options, "height",0);
|
||||||
|
net->w = option_find_int_quiet(options, "width",0);
|
||||||
|
net->c = option_find_int_quiet(options, "channels",0);
|
||||||
|
net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
|
||||||
|
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
|
||||||
|
}
|
||||||
|
|
||||||
network parse_network_cfg(char *filename)
|
network parse_network_cfg(char *filename)
|
||||||
{
|
{
|
||||||
list *sections = read_cfg(filename);
|
list *sections = read_cfg(filename);
|
||||||
network net = make_network(sections->size, 0);
|
|
||||||
|
|
||||||
node *n = sections->front;
|
node *n = sections->front;
|
||||||
|
if(!n) error("Config file has no sections");
|
||||||
|
network net = make_network(sections->size - 1);
|
||||||
|
size_params params;
|
||||||
|
|
||||||
|
section *s = (section *)n->val;
|
||||||
|
list *options = s->options;
|
||||||
|
if(!is_network(s)) error("First section must be [net] or [network]");
|
||||||
|
parse_net_options(options, &net);
|
||||||
|
|
||||||
|
params.h = net.h;
|
||||||
|
params.w = net.w;
|
||||||
|
params.c = net.c;
|
||||||
|
params.inputs = net.inputs;
|
||||||
|
params.batch = net.batch;
|
||||||
|
|
||||||
|
n = n->next;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while(n){
|
while(n){
|
||||||
section *s = (section *)n->val;
|
fprintf(stderr, "%d: ", count);
|
||||||
list *options = s->options;
|
s = (section *)n->val;
|
||||||
|
options = s->options;
|
||||||
if(is_convolutional(s)){
|
if(is_convolutional(s)){
|
||||||
convolutional_layer *layer = parse_convolutional(options, &net, count);
|
convolutional_layer *layer = parse_convolutional(options, params);
|
||||||
net.types[count] = CONVOLUTIONAL;
|
net.types[count] = CONVOLUTIONAL;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_deconvolutional(s)){
|
}else if(is_deconvolutional(s)){
|
||||||
deconvolutional_layer *layer = parse_deconvolutional(options, &net, count);
|
deconvolutional_layer *layer = parse_deconvolutional(options, params);
|
||||||
net.types[count] = DECONVOLUTIONAL;
|
net.types[count] = DECONVOLUTIONAL;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_connected(s)){
|
}else if(is_connected(s)){
|
||||||
connected_layer *layer = parse_connected(options, &net, count);
|
connected_layer *layer = parse_connected(options, params);
|
||||||
net.types[count] = CONNECTED;
|
net.types[count] = CONNECTED;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_crop(s)){
|
}else if(is_crop(s)){
|
||||||
crop_layer *layer = parse_crop(options, &net, count);
|
crop_layer *layer = parse_crop(options, params);
|
||||||
net.types[count] = CROP;
|
net.types[count] = CROP;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_cost(s)){
|
}else if(is_cost(s)){
|
||||||
cost_layer *layer = parse_cost(options, &net, count);
|
cost_layer *layer = parse_cost(options, params);
|
||||||
net.types[count] = COST;
|
net.types[count] = COST;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_detection(s)){
|
}else if(is_detection(s)){
|
||||||
detection_layer *layer = parse_detection(options, &net, count);
|
detection_layer *layer = parse_detection(options, params);
|
||||||
net.types[count] = DETECTION;
|
net.types[count] = DETECTION;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_softmax(s)){
|
}else if(is_softmax(s)){
|
||||||
softmax_layer *layer = parse_softmax(options, &net, count);
|
softmax_layer *layer = parse_softmax(options, params);
|
||||||
net.types[count] = SOFTMAX;
|
net.types[count] = SOFTMAX;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_maxpool(s)){
|
}else if(is_maxpool(s)){
|
||||||
maxpool_layer *layer = parse_maxpool(options, &net, count);
|
maxpool_layer *layer = parse_maxpool(options, params);
|
||||||
net.types[count] = MAXPOOL;
|
net.types[count] = MAXPOOL;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_normalization(s)){
|
}else if(is_normalization(s)){
|
||||||
normalization_layer *layer = parse_normalization(options, &net, count);
|
normalization_layer *layer = parse_normalization(options, params);
|
||||||
net.types[count] = NORMALIZATION;
|
net.types[count] = NORMALIZATION;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_dropout(s)){
|
}else if(is_dropout(s)){
|
||||||
dropout_layer *layer = parse_dropout(options, &net, count);
|
dropout_layer *layer = parse_dropout(options, params);
|
||||||
net.types[count] = DROPOUT;
|
net.types[count] = DROPOUT;
|
||||||
net.layers[count] = layer;
|
net.layers[count] = layer;
|
||||||
}else if(is_freeweight(s)){
|
|
||||||
//freeweight_layer *layer = parse_freeweight(options, &net, count);
|
|
||||||
//net.types[count] = FREEWEIGHT;
|
|
||||||
//net.layers[count] = layer;
|
|
||||||
fprintf(stderr, "Type not recognized: %s\n", s->type);
|
|
||||||
}else{
|
}else{
|
||||||
fprintf(stderr, "Type not recognized: %s\n", s->type);
|
fprintf(stderr, "Type not recognized: %s\n", s->type);
|
||||||
}
|
}
|
||||||
free_section(s);
|
free_section(s);
|
||||||
++count;
|
|
||||||
n = n->next;
|
n = n->next;
|
||||||
|
if(n){
|
||||||
|
image im = get_network_image_layer(net, count);
|
||||||
|
params.h = im.h;
|
||||||
|
params.w = im.w;
|
||||||
|
params.c = im.c;
|
||||||
|
params.inputs = get_network_output_size_layer(net, count);
|
||||||
|
}
|
||||||
|
++count;
|
||||||
}
|
}
|
||||||
free_list(sections);
|
free_list(sections);
|
||||||
net.outputs = get_network_output_size(net);
|
net.outputs = get_network_output_size(net);
|
||||||
@ -448,6 +363,11 @@ int is_convolutional(section *s)
|
|||||||
return (strcmp(s->type, "[conv]")==0
|
return (strcmp(s->type, "[conv]")==0
|
||||||
|| strcmp(s->type, "[convolutional]")==0);
|
|| strcmp(s->type, "[convolutional]")==0);
|
||||||
}
|
}
|
||||||
|
int is_network(section *s)
|
||||||
|
{
|
||||||
|
return (strcmp(s->type, "[net]")==0
|
||||||
|
|| strcmp(s->type, "[network]")==0);
|
||||||
|
}
|
||||||
int is_connected(section *s)
|
int is_connected(section *s)
|
||||||
{
|
{
|
||||||
return (strcmp(s->type, "[conn]")==0
|
return (strcmp(s->type, "[conn]")==0
|
||||||
@ -462,10 +382,6 @@ int is_dropout(section *s)
|
|||||||
{
|
{
|
||||||
return (strcmp(s->type, "[dropout]")==0);
|
return (strcmp(s->type, "[dropout]")==0);
|
||||||
}
|
}
|
||||||
int is_freeweight(section *s)
|
|
||||||
{
|
|
||||||
return (strcmp(s->type, "[freeweight]")==0);
|
|
||||||
}
|
|
||||||
|
|
||||||
int is_softmax(section *s)
|
int is_softmax(section *s)
|
||||||
{
|
{
|
||||||
@ -533,29 +449,11 @@ list *read_cfg(char *filename)
|
|||||||
|
|
||||||
void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int count)
|
void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0) pull_convolutional_layer(*l);
|
if(gpu_index >= 0) pull_convolutional_layer(*l);
|
||||||
#endif
|
#endif
|
||||||
int i;
|
int i;
|
||||||
fprintf(fp, "[convolutional]\n");
|
fprintf(fp, "[convolutional]\n");
|
||||||
if(count == 0) {
|
|
||||||
fprintf(fp, "batch=%d\n"
|
|
||||||
"height=%d\n"
|
|
||||||
"width=%d\n"
|
|
||||||
"channels=%d\n"
|
|
||||||
"learning_rate=%g\n"
|
|
||||||
"momentum=%g\n"
|
|
||||||
"decay=%g\n"
|
|
||||||
"seen=%d\n",
|
|
||||||
l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay, net.seen);
|
|
||||||
} else {
|
|
||||||
if(l->learning_rate != net.learning_rate)
|
|
||||||
fprintf(fp, "learning_rate=%g\n", l->learning_rate);
|
|
||||||
if(l->momentum != net.momentum)
|
|
||||||
fprintf(fp, "momentum=%g\n", l->momentum);
|
|
||||||
if(l->decay != net.decay)
|
|
||||||
fprintf(fp, "decay=%g\n", l->decay);
|
|
||||||
}
|
|
||||||
fprintf(fp, "filters=%d\n"
|
fprintf(fp, "filters=%d\n"
|
||||||
"size=%d\n"
|
"size=%d\n"
|
||||||
"stride=%d\n"
|
"stride=%d\n"
|
||||||
@ -573,29 +471,11 @@ void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int
|
|||||||
|
|
||||||
void print_deconvolutional_cfg(FILE *fp, deconvolutional_layer *l, network net, int count)
|
void print_deconvolutional_cfg(FILE *fp, deconvolutional_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0) pull_deconvolutional_layer(*l);
|
if(gpu_index >= 0) pull_deconvolutional_layer(*l);
|
||||||
#endif
|
#endif
|
||||||
int i;
|
int i;
|
||||||
fprintf(fp, "[deconvolutional]\n");
|
fprintf(fp, "[deconvolutional]\n");
|
||||||
if(count == 0) {
|
|
||||||
fprintf(fp, "batch=%d\n"
|
|
||||||
"height=%d\n"
|
|
||||||
"width=%d\n"
|
|
||||||
"channels=%d\n"
|
|
||||||
"learning_rate=%g\n"
|
|
||||||
"momentum=%g\n"
|
|
||||||
"decay=%g\n"
|
|
||||||
"seen=%d\n",
|
|
||||||
l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay, net.seen);
|
|
||||||
} else {
|
|
||||||
if(l->learning_rate != net.learning_rate)
|
|
||||||
fprintf(fp, "learning_rate=%g\n", l->learning_rate);
|
|
||||||
if(l->momentum != net.momentum)
|
|
||||||
fprintf(fp, "momentum=%g\n", l->momentum);
|
|
||||||
if(l->decay != net.decay)
|
|
||||||
fprintf(fp, "decay=%g\n", l->decay);
|
|
||||||
}
|
|
||||||
fprintf(fp, "filters=%d\n"
|
fprintf(fp, "filters=%d\n"
|
||||||
"size=%d\n"
|
"size=%d\n"
|
||||||
"stride=%d\n"
|
"stride=%d\n"
|
||||||
@ -610,47 +490,19 @@ void print_deconvolutional_cfg(FILE *fp, deconvolutional_layer *l, network net,
|
|||||||
fprintf(fp, "\n\n");
|
fprintf(fp, "\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_freeweight_cfg(FILE *fp, freeweight_layer *l, network net, int count)
|
|
||||||
{
|
|
||||||
fprintf(fp, "[freeweight]\n");
|
|
||||||
if(count == 0){
|
|
||||||
fprintf(fp, "batch=%d\ninput=%d\n",l->batch, l->inputs);
|
|
||||||
}
|
|
||||||
fprintf(fp, "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_dropout_cfg(FILE *fp, dropout_layer *l, network net, int count)
|
void print_dropout_cfg(FILE *fp, dropout_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
fprintf(fp, "[dropout]\n");
|
fprintf(fp, "[dropout]\n");
|
||||||
if(count == 0){
|
|
||||||
fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
|
|
||||||
}
|
|
||||||
fprintf(fp, "probability=%g\n\n", l->probability);
|
fprintf(fp, "probability=%g\n\n", l->probability);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count)
|
void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0) pull_connected_layer(*l);
|
if(gpu_index >= 0) pull_connected_layer(*l);
|
||||||
#endif
|
#endif
|
||||||
int i;
|
int i;
|
||||||
fprintf(fp, "[connected]\n");
|
fprintf(fp, "[connected]\n");
|
||||||
if(count == 0){
|
|
||||||
fprintf(fp, "batch=%d\n"
|
|
||||||
"input=%d\n"
|
|
||||||
"learning_rate=%g\n"
|
|
||||||
"momentum=%g\n"
|
|
||||||
"decay=%g\n"
|
|
||||||
"seen=%d\n",
|
|
||||||
l->batch, l->inputs, l->learning_rate, l->momentum, l->decay, net.seen);
|
|
||||||
} else {
|
|
||||||
if(l->learning_rate != net.learning_rate)
|
|
||||||
fprintf(fp, "learning_rate=%g\n", l->learning_rate);
|
|
||||||
if(l->momentum != net.momentum)
|
|
||||||
fprintf(fp, "momentum=%g\n", l->momentum);
|
|
||||||
if(l->decay != net.decay)
|
|
||||||
fprintf(fp, "decay=%g\n", l->decay);
|
|
||||||
}
|
|
||||||
fprintf(fp, "output=%d\n"
|
fprintf(fp, "output=%d\n"
|
||||||
"activation=%s\n",
|
"activation=%s\n",
|
||||||
l->outputs,
|
l->outputs,
|
||||||
@ -666,39 +518,18 @@ void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count)
|
|||||||
void print_crop_cfg(FILE *fp, crop_layer *l, network net, int count)
|
void print_crop_cfg(FILE *fp, crop_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
fprintf(fp, "[crop]\n");
|
fprintf(fp, "[crop]\n");
|
||||||
if(count == 0) {
|
|
||||||
fprintf(fp, "batch=%d\n"
|
|
||||||
"height=%d\n"
|
|
||||||
"width=%d\n"
|
|
||||||
"channels=%d\n"
|
|
||||||
"learning_rate=%g\n"
|
|
||||||
"momentum=%g\n"
|
|
||||||
"decay=%g\n"
|
|
||||||
"seen=%d\n",
|
|
||||||
l->batch,l->h, l->w, l->c, net.learning_rate, net.momentum, net.decay, net.seen);
|
|
||||||
}
|
|
||||||
fprintf(fp, "crop_height=%d\ncrop_width=%d\nflip=%d\n\n", l->crop_height, l->crop_width, l->flip);
|
fprintf(fp, "crop_height=%d\ncrop_width=%d\nflip=%d\n\n", l->crop_height, l->crop_width, l->flip);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count)
|
void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
fprintf(fp, "[maxpool]\n");
|
fprintf(fp, "[maxpool]\n");
|
||||||
if(count == 0) fprintf(fp, "batch=%d\n"
|
|
||||||
"height=%d\n"
|
|
||||||
"width=%d\n"
|
|
||||||
"channels=%d\n",
|
|
||||||
l->batch,l->h, l->w, l->c);
|
|
||||||
fprintf(fp, "size=%d\nstride=%d\n\n", l->size, l->stride);
|
fprintf(fp, "size=%d\nstride=%d\n\n", l->size, l->stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_normalization_cfg(FILE *fp, normalization_layer *l, network net, int count)
|
void print_normalization_cfg(FILE *fp, normalization_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
fprintf(fp, "[localresponsenormalization]\n");
|
fprintf(fp, "[localresponsenormalization]\n");
|
||||||
if(count == 0) fprintf(fp, "batch=%d\n"
|
|
||||||
"height=%d\n"
|
|
||||||
"width=%d\n"
|
|
||||||
"channels=%d\n",
|
|
||||||
l->batch,l->h, l->w, l->c);
|
|
||||||
fprintf(fp, "size=%d\n"
|
fprintf(fp, "size=%d\n"
|
||||||
"alpha=%g\n"
|
"alpha=%g\n"
|
||||||
"beta=%g\n"
|
"beta=%g\n"
|
||||||
@ -708,7 +539,6 @@ void print_normalization_cfg(FILE *fp, normalization_layer *l, network net, int
|
|||||||
void print_softmax_cfg(FILE *fp, softmax_layer *l, network net, int count)
|
void print_softmax_cfg(FILE *fp, softmax_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
fprintf(fp, "[softmax]\n");
|
fprintf(fp, "[softmax]\n");
|
||||||
if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
|
|
||||||
fprintf(fp, "\n");
|
fprintf(fp, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -722,7 +552,6 @@ void print_detection_cfg(FILE *fp, detection_layer *l, network net, int count)
|
|||||||
void print_cost_cfg(FILE *fp, cost_layer *l, network net, int count)
|
void print_cost_cfg(FILE *fp, cost_layer *l, network net, int count)
|
||||||
{
|
{
|
||||||
fprintf(fp, "[cost]\ntype=%s\n", get_cost_string(l->type));
|
fprintf(fp, "[cost]\ntype=%s\n", get_cost_string(l->type));
|
||||||
if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
|
|
||||||
fprintf(fp, "\n");
|
fprintf(fp, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -741,33 +570,33 @@ void save_weights(network net, char *filename)
|
|||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *) net.layers[i];
|
convolutional_layer layer = *(convolutional_layer *) net.layers[i];
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0){
|
if(gpu_index >= 0){
|
||||||
pull_convolutional_layer(layer);
|
pull_convolutional_layer(layer);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
int num = layer.n*layer.c*layer.size*layer.size;
|
int num = layer.n*layer.c*layer.size*layer.size;
|
||||||
fwrite(layer.biases, sizeof(float), layer.n, fp);
|
fwrite(layer.biases, sizeof(float), layer.n, fp);
|
||||||
fwrite(layer.filters, sizeof(float), num, fp);
|
fwrite(layer.filters, sizeof(float), num, fp);
|
||||||
}
|
}
|
||||||
if(net.types[i] == DECONVOLUTIONAL){
|
if(net.types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *) net.layers[i];
|
deconvolutional_layer layer = *(deconvolutional_layer *) net.layers[i];
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0){
|
if(gpu_index >= 0){
|
||||||
pull_deconvolutional_layer(layer);
|
pull_deconvolutional_layer(layer);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
int num = layer.n*layer.c*layer.size*layer.size;
|
int num = layer.n*layer.c*layer.size*layer.size;
|
||||||
fwrite(layer.biases, sizeof(float), layer.n, fp);
|
fwrite(layer.biases, sizeof(float), layer.n, fp);
|
||||||
fwrite(layer.filters, sizeof(float), num, fp);
|
fwrite(layer.filters, sizeof(float), num, fp);
|
||||||
}
|
}
|
||||||
if(net.types[i] == CONNECTED){
|
if(net.types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *) net.layers[i];
|
connected_layer layer = *(connected_layer *) net.layers[i];
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0){
|
if(gpu_index >= 0){
|
||||||
pull_connected_layer(layer);
|
pull_connected_layer(layer);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
fwrite(layer.biases, sizeof(float), layer.outputs, fp);
|
fwrite(layer.biases, sizeof(float), layer.outputs, fp);
|
||||||
fwrite(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp);
|
fwrite(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp);
|
||||||
}
|
}
|
||||||
@ -785,8 +614,7 @@ void load_weights_upto(network *net, char *filename, int cutoff)
|
|||||||
fread(&net->momentum, sizeof(float), 1, fp);
|
fread(&net->momentum, sizeof(float), 1, fp);
|
||||||
fread(&net->decay, sizeof(float), 1, fp);
|
fread(&net->decay, sizeof(float), 1, fp);
|
||||||
fread(&net->seen, sizeof(int), 1, fp);
|
fread(&net->seen, sizeof(int), 1, fp);
|
||||||
set_learning_network(net, net->learning_rate, net->momentum, net->decay);
|
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < net->n && i < cutoff; ++i){
|
for(i = 0; i < net->n && i < cutoff; ++i){
|
||||||
if(net->types[i] == CONVOLUTIONAL){
|
if(net->types[i] == CONVOLUTIONAL){
|
||||||
@ -794,32 +622,32 @@ void load_weights_upto(network *net, char *filename, int cutoff)
|
|||||||
int num = layer.n*layer.c*layer.size*layer.size;
|
int num = layer.n*layer.c*layer.size*layer.size;
|
||||||
fread(layer.biases, sizeof(float), layer.n, fp);
|
fread(layer.biases, sizeof(float), layer.n, fp);
|
||||||
fread(layer.filters, sizeof(float), num, fp);
|
fread(layer.filters, sizeof(float), num, fp);
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0){
|
if(gpu_index >= 0){
|
||||||
push_convolutional_layer(layer);
|
push_convolutional_layer(layer);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if(net->types[i] == DECONVOLUTIONAL){
|
if(net->types[i] == DECONVOLUTIONAL){
|
||||||
deconvolutional_layer layer = *(deconvolutional_layer *) net->layers[i];
|
deconvolutional_layer layer = *(deconvolutional_layer *) net->layers[i];
|
||||||
int num = layer.n*layer.c*layer.size*layer.size;
|
int num = layer.n*layer.c*layer.size*layer.size;
|
||||||
fread(layer.biases, sizeof(float), layer.n, fp);
|
fread(layer.biases, sizeof(float), layer.n, fp);
|
||||||
fread(layer.filters, sizeof(float), num, fp);
|
fread(layer.filters, sizeof(float), num, fp);
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0){
|
if(gpu_index >= 0){
|
||||||
push_deconvolutional_layer(layer);
|
push_deconvolutional_layer(layer);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if(net->types[i] == CONNECTED){
|
if(net->types[i] == CONNECTED){
|
||||||
connected_layer layer = *(connected_layer *) net->layers[i];
|
connected_layer layer = *(connected_layer *) net->layers[i];
|
||||||
fread(layer.biases, sizeof(float), layer.outputs, fp);
|
fread(layer.biases, sizeof(float), layer.outputs, fp);
|
||||||
fread(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp);
|
fread(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp);
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
if(gpu_index >= 0){
|
if(gpu_index >= 0){
|
||||||
push_connected_layer(layer);
|
push_connected_layer(layer);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
@ -847,8 +675,6 @@ void save_network(network net, char *filename)
|
|||||||
print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i);
|
print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i);
|
||||||
else if(net.types[i] == MAXPOOL)
|
else if(net.types[i] == MAXPOOL)
|
||||||
print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i);
|
print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i);
|
||||||
else if(net.types[i] == FREEWEIGHT)
|
|
||||||
print_freeweight_cfg(fp, (freeweight_layer *)net.layers[i], net, i);
|
|
||||||
else if(net.types[i] == DROPOUT)
|
else if(net.types[i] == DROPOUT)
|
||||||
print_dropout_cfg(fp, (dropout_layer *)net.layers[i], net, i);
|
print_dropout_cfg(fp, (dropout_layer *)net.layers[i], net, i);
|
||||||
else if(net.types[i] == NORMALIZATION)
|
else if(net.types[i] == NORMALIZATION)
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
softmax_layer *make_softmax_layer(int batch, int groups, int inputs)
|
softmax_layer *make_softmax_layer(int batch, int inputs, int groups)
|
||||||
{
|
{
|
||||||
assert(inputs%groups == 0);
|
assert(inputs%groups == 0);
|
||||||
fprintf(stderr, "Softmax Layer: %d inputs\n", inputs);
|
fprintf(stderr, "Softmax Layer: %d inputs\n", inputs);
|
||||||
@ -42,21 +42,21 @@ void softmax_array(float *input, int n, float *output)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_softmax_layer(const softmax_layer layer, float *input)
|
void forward_softmax_layer(const softmax_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int b;
|
int b;
|
||||||
int inputs = layer.inputs / layer.groups;
|
int inputs = layer.inputs / layer.groups;
|
||||||
int batch = layer.batch * layer.groups;
|
int batch = layer.batch * layer.groups;
|
||||||
for(b = 0; b < batch; ++b){
|
for(b = 0; b < batch; ++b){
|
||||||
softmax_array(input+b*inputs, inputs, layer.output+b*inputs);
|
softmax_array(state.input+b*inputs, inputs, layer.output+b*inputs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void backward_softmax_layer(const softmax_layer layer, float *delta)
|
void backward_softmax_layer(const softmax_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < layer.inputs*layer.batch; ++i){
|
for(i = 0; i < layer.inputs*layer.batch; ++i){
|
||||||
delta[i] = layer.delta[i];
|
state.delta[i] = layer.delta[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#ifndef SOFTMAX_LAYER_H
|
#ifndef SOFTMAX_LAYER_H
|
||||||
#define SOFTMAX_LAYER_H
|
#define SOFTMAX_LAYER_H
|
||||||
|
#include "params.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int inputs;
|
int inputs;
|
||||||
@ -14,14 +15,14 @@ typedef struct {
|
|||||||
} softmax_layer;
|
} softmax_layer;
|
||||||
|
|
||||||
void softmax_array(float *input, int n, float *output);
|
void softmax_array(float *input, int n, float *output);
|
||||||
softmax_layer *make_softmax_layer(int batch, int groups, int inputs);
|
softmax_layer *make_softmax_layer(int batch, int inputs, int groups);
|
||||||
void forward_softmax_layer(const softmax_layer layer, float *input);
|
void forward_softmax_layer(const softmax_layer layer, network_state state);
|
||||||
void backward_softmax_layer(const softmax_layer layer, float *delta);
|
void backward_softmax_layer(const softmax_layer layer, network_state state);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void pull_softmax_layer_output(const softmax_layer layer);
|
void pull_softmax_layer_output(const softmax_layer layer);
|
||||||
void forward_softmax_layer_gpu(const softmax_layer layer, float *input);
|
void forward_softmax_layer_gpu(const softmax_layer layer, network_state state);
|
||||||
void backward_softmax_layer_gpu(const softmax_layer layer, float *delta);
|
void backward_softmax_layer_gpu(const softmax_layer layer, network_state state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -32,23 +32,17 @@ extern "C" void pull_softmax_layer_output(const softmax_layer layer)
|
|||||||
cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch);
|
cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, float *input)
|
extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
int inputs = layer.inputs / layer.groups;
|
int inputs = layer.inputs / layer.groups;
|
||||||
int batch = layer.batch * layer.groups;
|
int batch = layer.batch * layer.groups;
|
||||||
forward_softmax_layer_kernel<<<cuda_gridsize(batch), BLOCK>>>(inputs, batch, input, layer.output_gpu);
|
forward_softmax_layer_kernel<<<cuda_gridsize(batch), BLOCK>>>(inputs, batch, state.input, layer.output_gpu);
|
||||||
check_error(cudaPeekAtLastError());
|
check_error(cudaPeekAtLastError());
|
||||||
|
|
||||||
/*
|
|
||||||
cl_read_array(layer.output_cl, layer.output, layer.inputs*layer.batch);
|
|
||||||
int z;
|
|
||||||
for(z = 0; z < layer.inputs*layer.batch; ++z) printf("%f,",layer.output[z]);
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, float *delta)
|
extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, delta, 1);
|
copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, state.delta, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This is if you want softmax w/o log-loss classification. You probably don't.
|
/* This is if you want softmax w/o log-loss classification. You probably don't.
|
||||||
|
Loading…
Reference in New Issue
Block a user