Stable place to commit

This commit is contained in:
Joseph Redmon 2015-02-04 12:41:20 -08:00
parent 0f1a31648c
commit bfffadc755
9 changed files with 72 additions and 22 deletions

View File

@ -43,6 +43,7 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVA
for(i = 0; i < outputs; ++i){ for(i = 0; i < outputs; ++i){
layer->biases[i] = scale; layer->biases[i] = scale;
// layer->biases[i] = 1;
} }
#ifdef GPU #ifdef GPU
@ -113,9 +114,10 @@ void forward_connected_layer(connected_layer layer, float *input)
void backward_connected_layer(connected_layer layer, float *input, float *delta) void backward_connected_layer(connected_layer layer, float *input, float *delta)
{ {
int i; int i;
float alpha = 1./layer.batch;
gradient_array(layer.output, layer.outputs*layer.batch, layer.activation, layer.delta); gradient_array(layer.output, layer.outputs*layer.batch, layer.activation, layer.delta);
for(i = 0; i < layer.batch; ++i){ for(i = 0; i < layer.batch; ++i){
axpy_cpu(layer.outputs, 1, layer.delta + i*layer.outputs, 1, layer.bias_updates, 1); axpy_cpu(layer.outputs, alpha, layer.delta + i*layer.outputs, 1, layer.bias_updates, 1);
} }
int m = layer.inputs; int m = layer.inputs;
int k = layer.batch; int k = layer.batch;
@ -123,7 +125,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
float *a = input; float *a = input;
float *b = layer.delta; float *b = layer.delta;
float *c = layer.weight_updates; float *c = layer.weight_updates;
gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); gemm(1,0,m,n,k,alpha,a,m,b,n,1,c,n);
m = layer.batch; m = layer.batch;
k = layer.outputs; k = layer.outputs;
@ -156,13 +158,18 @@ void push_connected_layer(connected_layer layer)
void update_connected_layer_gpu(connected_layer layer) void update_connected_layer_gpu(connected_layer layer)
{ {
/*
cuda_pull_array(layer.weights_gpu, layer.weights, layer.inputs*layer.outputs);
cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.inputs*layer.outputs);
printf("Weights: %f updates: %f\n", mag_array(layer.weights, layer.inputs*layer.outputs), layer.learning_rate*mag_array(layer.weight_updates, layer.inputs*layer.outputs));
*/
axpy_ongpu(layer.outputs, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); axpy_ongpu(layer.outputs, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
scal_ongpu(layer.outputs, layer.momentum, layer.bias_updates_gpu, 1); scal_ongpu(layer.outputs, layer.momentum, layer.bias_updates_gpu, 1);
axpy_ongpu(layer.inputs*layer.outputs, -layer.decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); axpy_ongpu(layer.inputs*layer.outputs, -layer.decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
axpy_ongpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); axpy_ongpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
scal_ongpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates_gpu, 1); scal_ongpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates_gpu, 1);
//pull_connected_layer(layer);
} }
void forward_connected_layer_gpu(connected_layer layer, float * input) void forward_connected_layer_gpu(connected_layer layer, float * input)
@ -183,10 +190,11 @@ void forward_connected_layer_gpu(connected_layer layer, float * input)
void backward_connected_layer_gpu(connected_layer layer, float * input, float * delta) void backward_connected_layer_gpu(connected_layer layer, float * input, float * delta)
{ {
float alpha = 1./layer.batch;
int i; int i;
gradient_array_ongpu(layer.output_gpu, layer.outputs*layer.batch, layer.activation, layer.delta_gpu); gradient_array_ongpu(layer.output_gpu, layer.outputs*layer.batch, layer.activation, layer.delta_gpu);
for(i = 0; i < layer.batch; ++i){ for(i = 0; i < layer.batch; ++i){
axpy_ongpu_offset(layer.outputs, 1, layer.delta_gpu, i*layer.outputs, 1, layer.bias_updates_gpu, 0, 1); axpy_ongpu_offset(layer.outputs, alpha, layer.delta_gpu, i*layer.outputs, 1, layer.bias_updates_gpu, 0, 1);
} }
int m = layer.inputs; int m = layer.inputs;
int k = layer.batch; int k = layer.batch;
@ -194,7 +202,7 @@ void backward_connected_layer_gpu(connected_layer layer, float * input, float *
float * a = input; float * a = input;
float * b = layer.delta_gpu; float * b = layer.delta_gpu;
float * c = layer.weight_updates_gpu; float * c = layer.weight_updates_gpu;
gemm_ongpu(1,0,m,n,k,1,a,m,b,n,1,c,n); gemm_ongpu(1,0,m,n,k,alpha,a,m,b,n,1,c,n);
m = layer.batch; m = layer.batch;
k = layer.outputs; k = layer.outputs;

View File

@ -28,7 +28,7 @@ extern "C" void bias_output_gpu(const convolutional_layer layer)
check_error(cudaPeekAtLastError()); check_error(cudaPeekAtLastError());
} }
__global__ void learn_bias(int batch, int n, int size, float *delta, float *bias_updates) __global__ void learn_bias(int batch, int n, int size, float *delta, float *bias_updates, float scale)
{ {
__shared__ float part[BLOCK]; __shared__ float part[BLOCK];
int i,b; int i,b;
@ -44,15 +44,16 @@ __global__ void learn_bias(int batch, int n, int size, float *delta, float *bias
part[p] = sum; part[p] = sum;
__syncthreads(); __syncthreads();
if(p == 0){ if(p == 0){
for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; for(i = 0; i < BLOCK; ++i) bias_updates[filter] += scale * part[i];
} }
} }
extern "C" void learn_bias_convolutional_layer_ongpu(convolutional_layer layer) extern "C" void learn_bias_convolutional_layer_ongpu(convolutional_layer layer)
{ {
int size = convolutional_out_height(layer)*convolutional_out_width(layer); int size = convolutional_out_height(layer)*convolutional_out_width(layer);
float alpha = 1./layer.batch;
learn_bias<<<layer.n, BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu); learn_bias<<<layer.n, BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu, alpha);
check_error(cudaPeekAtLastError()); check_error(cudaPeekAtLastError());
} }
@ -99,6 +100,7 @@ extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float
extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu) extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu)
{ {
float alpha = 1./layer.batch;
int i; int i;
int m = layer.n; int m = layer.n;
int n = layer.size*layer.size*layer.c; int n = layer.size*layer.size*layer.c;
@ -115,7 +117,7 @@ extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, floa
float * c = layer.filter_updates_gpu; float * c = layer.filter_updates_gpu;
im2col_ongpu(in, i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); im2col_ongpu(in, i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu);
gemm_ongpu(0,1,m,n,k,1,a + i*m*k,k,b,k,1,c,n); gemm_ongpu(0,1,m,n,k,alpha,a + i*m*k,k,b,k,1,c,n);
if(delta_gpu){ if(delta_gpu){
@ -151,12 +153,9 @@ extern "C" void update_convolutional_layer_gpu(convolutional_layer layer)
int size = layer.size*layer.size*layer.c*layer.n; int size = layer.size*layer.size*layer.c*layer.n;
/* /*
cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size); cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size);
cuda_pull_array(layer.filters_gpu, layer.filters, size); cuda_pull_array(layer.filters_gpu, layer.filters, size);
printf("Bias: %f updates: %f\n", mse_array(layer.biases, layer.n), mse_array(layer.bias_updates, layer.n)); printf("Filter: %f updates: %f\n", mag_array(layer.filters, size), layer.learning_rate*mag_array(layer.filter_updates, size));
printf("Filter: %f updates: %f\n", mse_array(layer.filters, layer.n), mse_array(layer.filter_updates, layer.n));
*/ */
axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);

View File

@ -66,11 +66,12 @@ convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, in
layer->biases = calloc(n, sizeof(float)); layer->biases = calloc(n, sizeof(float));
layer->bias_updates = calloc(n, sizeof(float)); layer->bias_updates = calloc(n, sizeof(float));
float scale = 1./sqrt(size*size*c); float scale = 1./sqrt(size*size*c);
//scale = .05; //scale = .01;
for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = scale*rand_normal(); for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = scale*rand_normal();
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
//layer->biases[i] = rand_normal()*scale + scale; //layer->biases[i] = rand_normal()*scale + scale;
layer->biases[i] = scale; layer->biases[i] = scale;
//layer->biases[i] = 1;
} }
int out_h = convolutional_out_height(*layer); int out_h = convolutional_out_height(*layer);
int out_w = convolutional_out_width(*layer); int out_w = convolutional_out_width(*layer);
@ -155,18 +156,20 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
void learn_bias_convolutional_layer(convolutional_layer layer) void learn_bias_convolutional_layer(convolutional_layer layer)
{ {
float alpha = 1./layer.batch;
int i,b; int i,b;
int size = convolutional_out_height(layer) int size = convolutional_out_height(layer)
*convolutional_out_width(layer); *convolutional_out_width(layer);
for(b = 0; b < layer.batch; ++b){ for(b = 0; b < layer.batch; ++b){
for(i = 0; i < layer.n; ++i){ for(i = 0; i < layer.n; ++i){
layer.bias_updates[i] += sum_array(layer.delta+size*(i+b*layer.n), size); layer.bias_updates[i] += alpha * sum_array(layer.delta+size*(i+b*layer.n), size);
} }
} }
} }
void backward_convolutional_layer(convolutional_layer layer, float *in, float *delta) void backward_convolutional_layer(convolutional_layer layer, float *in, float *delta)
{ {
float alpha = 1./layer.batch;
int i; int i;
int m = layer.n; int m = layer.n;
int n = layer.size*layer.size*layer.c; int n = layer.size*layer.size*layer.c;
@ -188,7 +191,7 @@ void backward_convolutional_layer(convolutional_layer layer, float *in, float *d
im2col_cpu(im, layer.c, layer.h, layer.w, im2col_cpu(im, layer.c, layer.h, layer.w,
layer.size, layer.stride, layer.pad, b); layer.size, layer.stride, layer.pad, b);
gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
if(delta){ if(delta){
a = layer.filters; a = layer.filters;

View File

@ -206,10 +206,28 @@ void train_imagenet_distributed(char *address)
} }
*/ */
char *basename(char *cfgfile)
{
char *c = cfgfile;
char *next;
while((next = strchr(c, '/')))
{
c = next+1;
}
c = copy_string(c);
next = strchr(c, '_');
if (next) *next = 0;
next = strchr(c, '.');
if (next) *next = 0;
return c;
}
void train_imagenet(char *cfgfile) void train_imagenet(char *cfgfile)
{ {
float avg_loss = 1; float avg_loss = -1;
srand(time(0)); srand(time(0));
char *base = basename(cfgfile);
printf("%s\n", base);
network net = parse_network_cfg(cfgfile); network net = parse_network_cfg(cfgfile);
//test_learn_bias(*(convolutional_layer *)net.layers[1]); //test_learn_bias(*(convolutional_layer *)net.layers[1]);
//set_learning_network(&net, net.learning_rate, 0, net.decay); //set_learning_network(&net, net.learning_rate, 0, net.decay);
@ -235,12 +253,13 @@ void train_imagenet(char *cfgfile)
time=clock(); time=clock();
float loss = train_network(net, train); float loss = train_network(net, train);
net.seen += imgs; net.seen += imgs;
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1; avg_loss = avg_loss*.9 + loss*.1;
printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
free_data(train); free_data(train);
if(i%100==0){ if(i%100==0){
char buff[256]; char buff[256];
sprintf(buff, "/home/pjreddie/imagenet_backup/vgg_%d.cfg", i); sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.cfg",base, i);
save_network(net, buff); save_network(net, buff);
} }
} }
@ -272,7 +291,6 @@ void validate_imagenet(char *filename)
pthread_join(load_thread, 0); pthread_join(load_thread, 0);
val = buffer; val = buffer;
//normalize_data_rows(val);
num = (i+1)*m/splits - i*m/splits; num = (i+1)*m/splits - i*m/splits;
char **part = paths+(i*m/splits); char **part = paths+(i*m/splits);
@ -312,6 +330,7 @@ void test_detection(char *cfgfile)
void test_init(char *cfgfile) void test_init(char *cfgfile)
{ {
gpu_index = -1;
network net = parse_network_cfg(cfgfile); network net = parse_network_cfg(cfgfile);
set_batch_network(&net, 1); set_batch_network(&net, 1);
srand(2222222); srand(2222222);
@ -345,7 +364,7 @@ void test_init(char *cfgfile)
} }
void test_dog(char *cfgfile) void test_dog(char *cfgfile)
{ {
image im = load_image_color("data/dog.jpg", 224, 224); image im = load_image_color("data/dog.jpg", 256, 256);
translate_image(im, -128); translate_image(im, -128);
print_image(im); print_image(im);
float *X = im.data; float *X = im.data;
@ -377,7 +396,7 @@ void test_imagenet(char *cfgfile)
strtok(filename, "\n"); strtok(filename, "\n");
image im = load_image_color(filename, 256, 256); image im = load_image_color(filename, 256, 256);
translate_image(im, -128); translate_image(im, -128);
//scale_image(im, 1/128.); scale_image(im, 1/128.);
printf("%d %d %d\n", im.h, im.w, im.c); printf("%d %d %d\n", im.h, im.w, im.c);
float *X = im.data; float *X = im.data;
time=clock(); time=clock();

View File

@ -276,6 +276,7 @@ void test_gpu_accuracy(int TA, int TB, int m, int k, int n)
int test_gpu_blas() int test_gpu_blas()
{ {
/*
test_gpu_accuracy(0,0,10,576,75); test_gpu_accuracy(0,0,10,576,75);
test_gpu_accuracy(0,0,17,10,10); test_gpu_accuracy(0,0,17,10,10);
@ -299,6 +300,15 @@ int test_gpu_blas()
time_ongpu(0,0,256,196,2304); time_ongpu(0,0,256,196,2304);
time_ongpu(0,0,128,4096,12544); time_ongpu(0,0,128,4096,12544);
time_ongpu(0,0,128,4096,4096); time_ongpu(0,0,128,4096,4096);
*/
time_ongpu(0,0,64,75,12544);
time_ongpu(0,0,64,75,12544);
time_ongpu(0,0,64,75,12544);
time_ongpu(0,0,64,576,12544);
time_ongpu(0,0,256,2304,784);
time_ongpu(1,1,2304,256,784);
time_ongpu(0,0,512,4608,196);
time_ongpu(1,1,4608,512,196);
return 0; return 0;
} }

View File

@ -133,7 +133,6 @@ void update_network(network net)
} }
else if(net.types[i] == CONNECTED){ else if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *)net.layers[i]; connected_layer layer = *(connected_layer *)net.layers[i];
//secret_update_connected_layer((connected_layer *)net.layers[i]);
update_connected_layer(layer); update_connected_layer(layer);
} }
} }

View File

@ -61,6 +61,7 @@ void forward_network_gpu(network net, float * input, float * truth, int train)
forward_crop_layer_gpu(layer, train, input); forward_crop_layer_gpu(layer, train, input);
input = layer.output_gpu; input = layer.output_gpu;
} }
//cudaDeviceSynchronize();
//printf("Forward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time)); //printf("Forward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time));
} }
} }

View File

@ -262,6 +262,16 @@ void translate_array(float *a, int n, float s)
} }
} }
float mag_array(float *a, int n)
{
int i;
float sum = 0;
for(i = 0; i < n; ++i){
sum += a[i]*a[i];
}
return sqrt(sum);
}
void scale_array(float *a, int n, float s) void scale_array(float *a, int n, float s)
{ {
int i; int i;

View File

@ -28,6 +28,7 @@ float rand_uniform();
float sum_array(float *a, int n); float sum_array(float *a, int n);
float mean_array(float *a, int n); float mean_array(float *a, int n);
float variance_array(float *a, int n); float variance_array(float *a, int n);
float mag_array(float *a, int n);
float **one_hot_encode(float *a, int n, int k); float **one_hot_encode(float *a, int n, int k);
float sec(clock_t clocks); float sec(clock_t clocks);
#endif #endif