Fixed batch stuff in conv layer

This commit is contained in:
Joseph Redmon 2014-07-17 10:14:59 -07:00
parent 1b94df24fd
commit 076009ebe3
6 changed files with 83 additions and 44 deletions

View File

@ -48,11 +48,10 @@ void test_convolve_matrix()
image edge = make_image((dog.h-size)/stride+1, (dog.w-size)/stride+1, n); image edge = make_image((dog.h-size)/stride+1, (dog.w-size)/stride+1, n);
int i; int i;
clock_t start = clock(), end; clock_t start = clock(), end;
for(i = 0; i < 1000; ++i){ for(i = 0; i < 1000; ++i){
im2col_cpu(dog.data, 1, dog.c, dog.h, dog.w, size, stride, 0, matrix); im2col_cpu(dog.data, dog.c, dog.h, dog.w, size, stride, 0, matrix);
gemm(0,0,n,mw,mh,1,filters,mh,matrix,mw,1,edge.data,mw); gemm(0,0,n,mw,mh,1,filters,mh,matrix,mw,1,edge.data,mw);
} }
end = clock(); end = clock();
@ -317,8 +316,8 @@ void test_nist()
clock_t start = clock(), end; clock_t start = clock(), end;
float loss = train_network_sgd(net, train, iters, lr, momentum, decay); float loss = train_network_sgd(net, train, iters, lr, momentum, decay);
end = clock(); end = clock();
//float test_acc = network_accuracy(net, test); float test_acc = network_accuracy(net, test);
float test_acc = 0; //float test_acc = 0;
printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay); printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
//printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay); //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay);
@ -434,7 +433,7 @@ void test_im2row()
float *matrix = calloc(msize, sizeof(float)); float *matrix = calloc(msize, sizeof(float));
int i; int i;
for(i = 0; i < 1000; ++i){ for(i = 0; i < 1000; ++i){
im2col_cpu(test.data, 1, c, h, w, size, stride, 0, matrix); im2col_cpu(test.data, c, h, w, size, stride, 0, matrix);
//image render = float_to_image(mh, mw, mc, matrix); //image render = float_to_image(mh, mw, mc, matrix);
} }
} }

View File

@ -10,10 +10,10 @@ inline void col2im_set_pixel(float *im, int height, int width, int channels,
} }
//This one might be too, can't remember. //This one might be too, can't remember.
void col2im_cpu(float* data_col, void col2im_cpu(float* data_col,
const int batch, const int channels, const int height, const int width, const int channels, const int height, const int width,
const int ksize, const int stride, int pad, float* data_im) const int ksize, const int stride, int pad, float* data_im)
{ {
int c,h,w,b; int c,h,w;
int height_col = (height - ksize) / stride + 1; int height_col = (height - ksize) / stride + 1;
int width_col = (width - ksize) / stride + 1; int width_col = (width - ksize) / stride + 1;
if (pad){ if (pad){
@ -22,9 +22,6 @@ void col2im_cpu(float* data_col,
pad = ksize/2; pad = ksize/2;
} }
int channels_col = channels * ksize * ksize; int channels_col = channels * ksize * ksize;
int im_size = height*width*channels;
int col_size = height_col*width_col*channels_col;
for (b = 0; b < batch; ++b) {
for (c = 0; c < channels_col; ++c) { for (c = 0; c < channels_col; ++c) {
int w_offset = c % ksize; int w_offset = c % ksize;
int h_offset = (c / ksize) % ksize; int h_offset = (c / ksize) % ksize;
@ -39,9 +36,6 @@ void col2im_cpu(float* data_col,
} }
} }
} }
data_im += im_size;
data_col+= col_size;
}
} }

View File

@ -79,7 +79,7 @@ convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, in
layer->bias_updates_cl = cl_make_array(layer->bias_updates, n); layer->bias_updates_cl = cl_make_array(layer->bias_updates, n);
layer->bias_momentum_cl = cl_make_array(layer->bias_momentum, n); layer->bias_momentum_cl = cl_make_array(layer->bias_momentum, n);
layer->col_image_cl = cl_make_array(layer->col_image, layer->batch*out_h*out_w*size*size*c); layer->col_image_cl = cl_make_array(layer->col_image, layer.batch*out_h*out_w*size*size*c);
layer->delta_cl = cl_make_array(layer->delta, layer->batch*out_h*out_w*n); layer->delta_cl = cl_make_array(layer->delta, layer->batch*out_h*out_w*n);
layer->output_cl = cl_make_array(layer->output, layer->batch*out_h*out_w*n); layer->output_cl = cl_make_array(layer->output, layer->batch*out_h*out_w*n);
#endif #endif
@ -124,24 +124,32 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
{ {
int out_h = convolutional_out_height(layer); int out_h = convolutional_out_height(layer);
int out_w = convolutional_out_width(layer); int out_w = convolutional_out_width(layer);
int i;
bias_output(layer);
int m = layer.n; int m = layer.n;
int k = layer.size*layer.size*layer.c; int k = layer.size*layer.size*layer.c;
int n = out_h*out_w*layer.batch; int n = out_h*out_w;
float *a = layer.filters; float *a = layer.filters;
float *b = layer.col_image; float *b = layer.col_image;
float *c = layer.output; float *c = layer.output;
im2col_cpu(in, layer.batch, layer.c, layer.h, layer.w,
for(i = 0; i < layer.batch; ++i){
im2col_cpu(in, layer.c, layer.h, layer.w,
layer.size, layer.stride, layer.pad, b); layer.size, layer.stride, layer.pad, b);
bias_output(layer);
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
c += n*m;
in += layer.h*layer.w*layer.c;
b += k*n;
}
/* /*
int i; int i;
for(i = 0; i < m*n; ++i) printf("%f, ", layer.output[i]); for(i = 0; i < m*n; ++i) printf("%f, ", layer.output[i]);
printf("\n"); printf("\n");
*/ */
activate_array(layer.output, m*n, layer.activation, 0.); activate_array(layer.output, m*n*layer.batch, layer.activation, 0.);
} }
#ifdef GPU #ifdef GPU
@ -178,35 +186,42 @@ void learn_bias_convolutional_layer(convolutional_layer layer)
void backward_convolutional_layer(convolutional_layer layer, float *delta) void backward_convolutional_layer(convolutional_layer layer, float *delta)
{ {
int i;
int m = layer.n; int m = layer.n;
int n = layer.size*layer.size*layer.c; int n = layer.size*layer.size*layer.c;
int k = convolutional_out_height(layer)* int k = convolutional_out_height(layer)*
convolutional_out_width(layer)* convolutional_out_width(layer);
layer.batch; gradient_array(layer.output, m*k*layer.batch, layer.activation, layer.delta);
gradient_array(layer.output, m*k, layer.activation, layer.delta);
learn_bias_convolutional_layer(layer); learn_bias_convolutional_layer(layer);
float *a = layer.delta; float *a = layer.delta;
float *b = layer.col_image; float *b = layer.col_image;
float *c = layer.filter_updates; float *c = layer.filter_updates;
for(i = 0; i < layer.batch; ++i){
gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
a += m*k;
b += k*n;
}
if(delta){ if(delta){
m = layer.size*layer.size*layer.c; m = layer.size*layer.size*layer.c;
k = layer.n; k = layer.n;
n = convolutional_out_height(layer)* n = convolutional_out_height(layer)*
convolutional_out_width(layer)* convolutional_out_width(layer);
layer.batch;
a = layer.filters; a = layer.filters;
b = layer.delta; b = layer.delta;
c = layer.col_image; c = layer.col_image;
gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);
memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
col2im_cpu(c, layer.batch, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta);
for(i = 0; i < layer.batch; ++i){
gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);
col2im_cpu(c, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta);
c += k*n;
delta += layer.h*layer.w*layer.c;
}
} }
} }

View File

@ -14,7 +14,7 @@ inline float im2col_get_pixel(float *im, int height, int width, int channels,
//From Berkeley Vision's Caffe! //From Berkeley Vision's Caffe!
//https://github.com/BVLC/caffe/blob/master/LICENSE //https://github.com/BVLC/caffe/blob/master/LICENSE
void im2col_cpu(float* data_im, void im2col_cpu_batch(float* data_im,
const int batch, const int channels, const int height, const int width, const int batch, const int channels, const int height, const int width,
const int ksize, const int stride, int pad, float* data_col) const int ksize, const int stride, int pad, float* data_col)
{ {
@ -49,6 +49,37 @@ void im2col_cpu(float* data_im,
} }
} }
//From Berkeley Vision's Caffe!
//https://github.com/BVLC/caffe/blob/master/LICENSE
void im2col_cpu(float* data_im,
const int channels, const int height, const int width,
const int ksize, const int stride, int pad, float* data_col)
{
int c,h,w;
int height_col = (height - ksize) / stride + 1;
int width_col = (width - ksize) / stride + 1;
if (pad){
height_col = 1 + (height-1) / stride;
width_col = 1 + (width-1) / stride;
pad = ksize/2;
}
int channels_col = channels * ksize * ksize;
for (c = 0; c < channels_col; ++c) {
int w_offset = c % ksize;
int h_offset = (c / ksize) % ksize;
int c_im = c / ksize / ksize;
for (h = 0; h < height_col; ++h) {
for (w = 0; w < width_col; ++w) {
int im_row = h_offset + h * stride;
int im_col = w_offset + w * stride;
int col_index = (c * height_col + h) * width_col + w;
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
im_row, im_col, c_im, pad);
}
}
}
}
#ifdef GPU #ifdef GPU

View File

@ -26,11 +26,11 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
#endif #endif
void im2col_cpu(float* data_im, void im2col_cpu(float* data_im,
const int batch, const int channels, const int height, const int width, const int channels, const int height, const int width,
const int ksize, const int stride, int pad, float* data_col); const int ksize, const int stride, int pad, float* data_col);
void col2im_cpu(float* data_col, void col2im_cpu(float* data_col,
const int batch, const int channels, const int height, const int width, const int channels, const int height, const int width,
const int ksize, const int stride, int pad, float* data_im); const int ksize, const int stride, int pad, float* data_im);
void test_blas(); void test_blas();

View File

@ -274,7 +274,7 @@ float calculate_error_network(network net, float *truth)
//printf("%5.2f %5.2f, ", out[i], truth[i]); //printf("%5.2f %5.2f, ", out[i], truth[i]);
//if(i == get_network_output_size(net)) printf("\n"); //if(i == get_network_output_size(net)) printf("\n");
delta[i] = truth[i] - out[i]; delta[i] = truth[i] - out[i];
//printf("%f, ", delta[i]); //printf("%.10f, ", out[i]);
sum += delta[i]*delta[i]; sum += delta[i]*delta[i];
} }
//printf("\n"); //printf("\n");