per image randomness in crop layer

This commit is contained in:
Joseph Redmon 2015-04-17 12:32:54 -07:00
parent 47528e37cf
commit f199fd3b64
8 changed files with 72 additions and 45 deletions

View File

@ -1,4 +1,4 @@
GPU=0 GPU=1
DEBUG=0 DEBUG=0
ARCH= -arch=sm_52 ARCH= -arch=sm_52

View File

@ -10,7 +10,7 @@ image get_crop_image(crop_layer layer)
return float_to_image(w,h,c,layer.output); return float_to_image(w,h,c,layer.output);
} }
crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle) crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
{ {
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
crop_layer *layer = calloc(1, sizeof(crop_layer)); crop_layer *layer = calloc(1, sizeof(crop_layer));
@ -20,11 +20,14 @@ crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int
layer->c = c; layer->c = c;
layer->flip = flip; layer->flip = flip;
layer->angle = angle; layer->angle = angle;
layer->saturation = saturation;
layer->exposure = exposure;
layer->crop_width = crop_width; layer->crop_width = crop_width;
layer->crop_height = crop_height; layer->crop_height = crop_height;
layer->output = calloc(crop_width*crop_height * c*batch, sizeof(float)); layer->output = calloc(crop_width*crop_height * c*batch, sizeof(float));
#ifdef GPU #ifdef GPU
layer->output_gpu = cuda_make_array(layer->output, crop_width*crop_height*c*batch); layer->output_gpu = cuda_make_array(layer->output, crop_width*crop_height*c*batch);
layer->rand_gpu = cuda_make_array(0, layer->batch*8);
#endif #endif
return layer; return layer;
} }

View File

@ -11,14 +11,17 @@ typedef struct {
int crop_height; int crop_height;
int flip; int flip;
float angle; float angle;
float saturation;
float exposure;
float *output; float *output;
#ifdef GPU #ifdef GPU
float *output_gpu; float *output_gpu;
float *rand_gpu;
#endif #endif
} crop_layer; } crop_layer;
image get_crop_image(crop_layer layer); image get_crop_image(crop_layer layer);
crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle); crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
void forward_crop_layer(const crop_layer layer, network_state state); void forward_crop_layer(const crop_layer layer, network_state state);
#ifdef GPU #ifdef GPU

View File

@ -93,7 +93,7 @@ __device__ float billinear_interpolate_kernel(float *image, int w, int h, float
return val; return val;
} }
__global__ void levels_image_kernel(float *image, int batch, int w, int h, float saturation, float exposure, float translate, float scale) __global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale)
{ {
int size = batch * w * h; int size = batch * w * h;
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@ -102,22 +102,34 @@ __global__ void levels_image_kernel(float *image, int batch, int w, int h, float
id /= w; id /= w;
int y = id % h; int y = id % h;
id /= h; id /= h;
float r0 = rand[8*id + 0];
float r1 = rand[8*id + 1];
float r2 = rand[8*id + 2];
float r3 = rand[8*id + 3];
saturation = r0*(saturation - 1) + 1;
saturation = (r1 > .5) ? 1./saturation : saturation;
exposure = r2*(exposure - 1) + 1;
exposure = (r3 > .5) ? 1./exposure : exposure;
size_t offset = id * h * w * 3; size_t offset = id * h * w * 3;
image += offset; image += offset;
float r = image[x + w*(y + h*2)]; float r = image[x + w*(y + h*2)];
float g = image[x + w*(y + h*1)]; float g = image[x + w*(y + h*1)];
float b = image[x + w*(y + h*0)]; float b = image[x + w*(y + h*0)];
float3 rgb = make_float3(r,g,b); float3 rgb = make_float3(r,g,b);
float3 hsv = rgb_to_hsv_kernel(rgb); if(train){
hsv.y *= saturation; float3 hsv = rgb_to_hsv_kernel(rgb);
hsv.z *= exposure; hsv.y *= saturation;
rgb = hsv_to_rgb_kernel(hsv); hsv.z *= exposure;
rgb = hsv_to_rgb_kernel(hsv);
}
image[x + w*(y + h*2)] = rgb.x*scale + translate; image[x + w*(y + h*2)] = rgb.x*scale + translate;
image[x + w*(y + h*1)] = rgb.y*scale + translate; image[x + w*(y + h*1)] = rgb.y*scale + translate;
image[x + w*(y + h*0)] = rgb.z*scale + translate; image[x + w*(y + h*0)] = rgb.z*scale + translate;
} }
__global__ void forward_crop_layer_kernel(float *input, int size, int c, int h, int w, int crop_height, int crop_width, int dh, int dw, int flip, float angle, float *output) __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output)
{ {
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(id >= size) return; if(id >= size) return;
@ -134,10 +146,26 @@ __global__ void forward_crop_layer_kernel(float *input, int size, int c, int h,
id /= c; id /= c;
int b = id; int b = id;
float r4 = rand[8*b + 4];
float r5 = rand[8*b + 5];
float r6 = rand[8*b + 6];
float r7 = rand[8*b + 7];
float dw = (w - crop_width)*r4;
float dh = (h - crop_height)*r5;
flip = (flip && (r6 > .5));
angle = 2*angle*r7 - angle;
if(!train){
dw = (w - crop_width)/2.;
dh = (h - crop_height)/2.;
flip = 0;
angle = 0;
}
input += w*h*c*b; input += w*h*c*b;
int x = (flip) ? w - dw - j - 1 : j + dw; float x = (flip) ? w - dw - j - 1 : j + dw;
int y = i + dh; float y = i + dh;
float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx; float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx;
float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy; float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy;
@ -147,38 +175,21 @@ __global__ void forward_crop_layer_kernel(float *input, int size, int c, int h,
extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
{ {
int flip = (layer.flip && rand()%2); cuda_random(layer.rand_gpu, layer.batch*8);
int dh = rand()%(layer.h - layer.crop_height + 1);
int dw = rand()%(layer.w - layer.crop_width + 1);
float radians = layer.angle*3.14159/180.;
float angle = 2*radians*rand_uniform() - radians;
float saturation = rand_uniform() + 1; float radians = layer.angle*3.14159/180.;
if(rand_uniform() > .5) saturation = 1./saturation;
float exposure = rand_uniform() + 1;
if(rand_uniform() > .5) exposure = 1./exposure;
float scale = 2; float scale = 2;
float translate = -1; float translate = -1;
if(!state.train){
angle = 0;
flip = 0;
dh = (layer.h - layer.crop_height)/2;
dw = (layer.w - layer.crop_width)/2;
saturation = 1;
exposure = 1;
}
int size = layer.batch * layer.w * layer.h; int size = layer.batch * layer.w * layer.h;
levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.batch, layer.w, layer.h, saturation, exposure, translate, scale); levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale);
check_error(cudaPeekAtLastError()); check_error(cudaPeekAtLastError());
size = layer.batch*layer.c*layer.crop_width*layer.crop_height; size = layer.batch*layer.c*layer.crop_width*layer.crop_height;
forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.c, layer.h, layer.w, forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.crop_height, layer.crop_width, state.train, layer.flip, radians, layer.output_gpu);
layer.crop_height, layer.crop_width, dh, dw, flip, angle, layer.output_gpu);
check_error(cudaPeekAtLastError()); check_error(cudaPeekAtLastError());
/* /*
@ -186,6 +197,14 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch));
image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch));
image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch));
translate_image(im, -translate);
scale_image(im, 1/scale);
translate_image(im2, -translate);
scale_image(im2, 1/scale);
translate_image(im3, -translate);
scale_image(im3, 1/scale);
show_image(im, "cropped"); show_image(im, "cropped");
show_image(im2, "cropped2"); show_image(im2, "cropped2");
show_image(im3, "cropped3"); show_image(im3, "cropped3");

View File

@ -68,7 +68,7 @@ void partial(char *cfgfile, char *weightfile, char *outfile, int max)
if(weightfile){ if(weightfile){
load_weights_upto(&net, weightfile, max); load_weights_upto(&net, weightfile, max);
} }
//net.seen = 0; net.seen = 0;
save_weights(net, outfile); save_weights(net, outfile);
} }

View File

@ -82,6 +82,8 @@ void train_detection(char *cfgfile, char *weightfile)
plist = get_paths("/home/pjreddie/data/imagenet/det.train.list"); plist = get_paths("/home/pjreddie/data/imagenet/det.train.list");
}else{ }else{
plist = get_paths("/home/pjreddie/data/voc/trainall.txt"); plist = get_paths("/home/pjreddie/data/voc/trainall.txt");
//plist = get_paths("/home/pjreddie/data/coco/trainval.txt");
//plist = get_paths("/home/pjreddie/data/voc/all2007-2012.txt");
} }
paths = (char **)list_to_array(plist); paths = (char **)list_to_array(plist);
pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer); pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer);
@ -94,13 +96,11 @@ void train_detection(char *cfgfile, char *weightfile)
load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer); load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer);
/* /*
image im = float_to_image(net.w, net.h, 3, train.X.vals[114]); image im = float_to_image(net.w, net.h, 3, train.X.vals[114]);
image copy = copy_image(im); image copy = copy_image(im);
translate_image(copy, 1); draw_detection(copy, train.y.vals[114], 7);
scale_image(copy, .5); free_image(copy);
draw_detection(copy, train.y.vals[114], 7); */
free_image(copy);
*/
printf("Loaded: %lf seconds\n", sec(clock()-time)); printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock(); time=clock();

View File

@ -182,8 +182,8 @@ void show_image(image p, char *name)
} }
} }
free_image(copy); free_image(copy);
if(disp->height < 500 || disp->width < 500 || disp->height > 1000){ if(disp->height < 448 || disp->width < 448 || disp->height > 1000){
int w = 500; int w = 448;
int h = w*p.h/p.w; int h = w*p.h/p.w;
if(h > 1000){ if(h > 1000){
h = 1000; h = 1000;
@ -191,7 +191,7 @@ void show_image(image p, char *name)
} }
IplImage *buffer = disp; IplImage *buffer = disp;
disp = cvCreateImage(cvSize(w, h), buffer->depth, buffer->nChannels); disp = cvCreateImage(cvSize(w, h), buffer->depth, buffer->nChannels);
cvResize(buffer, disp, CV_INTER_NN); cvResize(buffer, disp, CV_INTER_LINEAR);
cvReleaseImage(&buffer); cvReleaseImage(&buffer);
} }
cvShowImage(buff, disp); cvShowImage(buff, disp);

View File

@ -187,6 +187,8 @@ crop_layer *parse_crop(list *options, size_params params)
int crop_width = option_find_int(options, "crop_width",1); int crop_width = option_find_int(options, "crop_width",1);
int flip = option_find_int(options, "flip",0); int flip = option_find_int(options, "flip",0);
float angle = option_find_float(options, "angle",0); float angle = option_find_float(options, "angle",0);
float saturation = option_find_float(options, "saturation",1);
float exposure = option_find_float(options, "exposure",1);
int batch,h,w,c; int batch,h,w,c;
h = params.h; h = params.h;
@ -195,7 +197,7 @@ crop_layer *parse_crop(list *options, size_params params)
batch=params.batch; batch=params.batch;
if(!(h && w && c)) error("Layer before crop layer must output image."); if(!(h && w && c)) error("Layer before crop layer must output image.");
crop_layer *layer = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle); crop_layer *layer = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure);
option_unused(options); option_unused(options);
return layer; return layer;
} }