diff --git a/Makefile b/Makefile index 0728872b..63cb621b 100644 --- a/Makefile +++ b/Makefile @@ -31,16 +31,17 @@ OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2c endif OBJS = $(addprefix $(OBJDIR), $(OBJ)) +DEPS = $(wildcard src/*.h) Makefile all: $(EXEC) $(EXEC): $(OBJS) $(CC) $(COMMON) $(CFLAGS) $(LDFLAGS) $^ -o $@ -$(OBJDIR)%.o: %.c +$(OBJDIR)%.o: %.c $(DEPS) $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ -$(OBJDIR)%.o: %.cu +$(OBJDIR)%.o: %.cu $(DEPS) $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ .PHONY: clean diff --git a/src/connected_layer.c b/src/connected_layer.c index 4c40fc08..bdab6d84 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -33,7 +33,7 @@ connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVA float scale = 1./sqrt(inputs); for(i = 0; i < inputs*outputs; ++i){ - //layer->weights[i] = scale*rand_normal(); + layer->weights[i] = 2*scale*rand_uniform() - scale; } for(i = 0; i < outputs; ++i){ diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index ade2ac1d..cd357d39 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -61,7 +61,7 @@ convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, in layer->biases = calloc(n, sizeof(float)); layer->bias_updates = calloc(n, sizeof(float)); float scale = 1./sqrt(size*size*c); - for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = scale*rand_normal(); + for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = 2*scale*rand_uniform() - scale; for(i = 0; i < n; ++i){ layer->biases[i] = scale; } diff --git a/src/crop_layer.c b/src/crop_layer.c index 819b7547..bc131514 100644 --- a/src/crop_layer.c +++ b/src/crop_layer.c @@ -10,7 +10,7 @@ image get_crop_image(crop_layer layer) return float_to_image(w,h,c,layer.output); } -crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip) +crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle) { fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); crop_layer *layer = calloc(1, sizeof(crop_layer)); @@ -19,6 +19,7 @@ crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int layer->w = w; layer->c = c; layer->flip = flip; + layer->angle = angle; layer->crop_width = crop_width; layer->crop_height = crop_height; layer->output = calloc(crop_width*crop_height * c*batch, sizeof(float)); diff --git a/src/crop_layer.h b/src/crop_layer.h index 05a511b3..a320f0e4 100644 --- a/src/crop_layer.h +++ b/src/crop_layer.h @@ -10,6 +10,7 @@ typedef struct { int crop_width; int crop_height; int flip; + float angle; float *output; #ifdef GPU float *output_gpu; @@ -17,7 +18,7 @@ typedef struct { } crop_layer; image get_crop_image(crop_layer layer); -crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip); +crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle); void forward_crop_layer(const crop_layer layer, network_state state); #ifdef GPU diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu index ca6bb20e..1d20d786 100644 --- a/src/crop_layer_kernels.cu +++ b/src/crop_layer_kernels.cu @@ -61,7 +61,8 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) int flip = (layer.flip && rand()%2); int dh = rand()%(layer.h - layer.crop_height + 1); int dw = rand()%(layer.w - layer.crop_width + 1); - float angle = rand_uniform() - .5; + float radians = layer.angle*3.14159/180.; + float angle = 2*radians*rand_uniform() - radians; if(!state.train){ angle = 0; flip = 0; @@ -76,5 +77,12 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) forward_crop_layer_kernel<<>>(state.input, size, layer.c, layer.h, layer.w, layer.crop_height, layer.crop_width, dh, dw, flip, angle, layer.output_gpu); check_error(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer.output_gpu, layer.output, size); + image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 14*(size/layer.batch)); + show_image(im, "cropped"); + cvWaitKey(0); + */ } diff --git a/src/darknet.c b/src/darknet.c index 0cd6153e..cca5473a 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -93,7 +93,6 @@ void visualize(char *cfgfile, char *weightfile) int main(int argc, char **argv) { - //test_resize(argv[1]); //test_convolutional_layer(); if(argc < 2){ fprintf(stderr, "usage: %s \n", argv[0]); @@ -114,6 +113,8 @@ int main(int argc, char **argv) run_imagenet(argc, argv); } else if (0 == strcmp(argv[1], "detection")){ run_detection(argc, argv); + } else if (0 == strcmp(argv[1], "test")){ + test_resize(argv[2]); } else if (0 == strcmp(argv[1], "captcha")){ run_captcha(argc, argv); } else if (0 == strcmp(argv[1], "change")){ diff --git a/src/data.c b/src/data.c index 1ef2c403..5a6a6022 100644 --- a/src/data.c +++ b/src/data.c @@ -294,6 +294,8 @@ data load_data_detection_jitter_random(int n, char **paths, int m, int classes, d.y = make_matrix(n, k); for(i = 0; i < n; ++i){ image orig = load_image_color(random_paths[i], 0, 0); + translate_image(orig, -128); + scale_image(orig, 1./128); int oh = orig.h; int ow = orig.w; @@ -310,6 +312,13 @@ data load_data_detection_jitter_random(int n, char **paths, int m, int classes, float sx = (float)swidth / ow; float sy = (float)sheight / oh; + + /* + float angle = rand_uniform()*.1 - .05; + image rot = rotate_image(orig, angle); + free_image(orig); + orig = rot; + */ int flip = rand()%2; image cropped = crop_image(orig, pleft, ptop, swidth, sheight); @@ -333,8 +342,6 @@ void *load_detection_thread(void *ptr) printf("Loading data: %d\n", rand()); struct load_args a = *(struct load_args*)ptr; *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.w, a.h, a.num_boxes, a.background); - translate_data_rows(*a.d, -128); - scale_data_rows(*a.d, 1./128); free(ptr); return 0; } @@ -435,7 +442,7 @@ data load_cifar10_data(char *filename) X.vals[i][j] = (double)bytes[j+1]; } } - translate_data_rows(d, -144); + translate_data_rows(d, -128); scale_data_rows(d, 1./128); //normalize_data_rows(d); fclose(fp); @@ -491,7 +498,7 @@ data load_all_cifar10() fclose(fp); } //normalize_data_rows(d); - translate_data_rows(d, -144); + translate_data_rows(d, -128); scale_data_rows(d, 1./128); return d; } diff --git a/src/detection.c b/src/detection.c index 1cf9ef08..fec39803 100644 --- a/src/detection.c +++ b/src/detection.c @@ -93,9 +93,9 @@ void train_detection(char *cfgfile, char *weightfile) load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer); /* - image im = float_to_image(im_dim, im_dim, 3, train.X.vals[114]); + image im = float_to_image(net.w, net.h, 3, train.X.vals[114]); draw_detection(im, train.y.vals[114], 7); -*/ + */ printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); diff --git a/src/image.c b/src/image.c index 49d46477..d3fb61ef 100644 --- a/src/image.c +++ b/src/image.c @@ -150,7 +150,6 @@ image copy_image(image p) return copy; } - void show_image(image p, char *name) { int x,y,k; @@ -317,7 +316,7 @@ image crop_image(image im, int dx, int dy, int w, int h) for(i = 0; i < w; ++i){ int r = j + dy; int c = i + dx; - float val = 128; + float val = 0; if (r >= 0 && r < im.h && c >= 0 && c < im.w) { val = get_pixel(im, c, r, k); } @@ -328,6 +327,54 @@ image crop_image(image im, int dx, int dy, int w, int h) return cropped; } +image grayscale_image(image im) +{ + assert(im.c == 3); + int i, j, k; + image gray = make_image(im.w, im.h, im.c); + float scale[] = {0.114, 0.587, 0.299}; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); + } + } + } + memcpy(gray.data + im.w*im.h*1, gray.data, sizeof(float)*im.w*im.h); + memcpy(gray.data + im.w*im.h*2, gray.data, sizeof(float)*im.w*im.h); + return gray; +} + +image blend_image(image fore, image back, float alpha) +{ + assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); + image blend = make_image(fore.w, fore.h, fore.c); + int i, j, k; + for(k = 0; k < fore.c; ++k){ + for(j = 0; j < fore.h; ++j){ + for(i = 0; i < fore.w; ++i){ + float val = alpha * get_pixel(fore, i, j, k) + + (1 - alpha)* get_pixel(back, i, j, k); + set_pixel(blend, i, j, k, val); + } + } + } + return blend; +} + +image saturate_image(image im, float sat) +{ + image gray = grayscale_image(im); + image blend = blend_image(im, gray, sat); + free_image(gray); + return blend; +} + +image brightness_image(image im, float b) +{ + image bright = make_image(im.w, im.h, im.c); +} + float billinear_interpolate(image im, float x, float y, int c) { int ix = (int) floorf(x); @@ -337,9 +384,9 @@ float billinear_interpolate(image im, float x, float y, int c) float dy = y - iy; float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + - dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + - (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + - dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); return val; } @@ -374,14 +421,22 @@ void test_resize(char *filename) image rot = rotate_image(big, .02); image rot2 = rotate_image(big, 3.14159265/2.); image test = rotate_image(im, .6); + image gray = grayscale_image(im); + image sat = saturate_image(im, 2); + image sat2 = saturate_image(im, .5); show_image(im, "original"); - show_image(small, "smaller"); - show_image(big, "bigger"); - show_image(crop, "crop"); - show_image(crop2, "crop2"); - show_image(rot, "rot"); - show_image(rot2, "rot2"); - show_image(test, "test"); + show_image(gray, "gray"); + show_image(sat, "sat"); + show_image(sat2, "sat2"); + /* + show_image(small, "smaller"); + show_image(big, "bigger"); + show_image(crop, "crop"); + show_image(crop2, "crop2"); + show_image(rot, "rot"); + show_image(rot2, "rot2"); + show_image(test, "test"); + */ cvWaitKey(0); } diff --git a/src/parser.c b/src/parser.c index e4ee17e4..08e0ea12 100644 --- a/src/parser.c +++ b/src/parser.c @@ -186,6 +186,7 @@ crop_layer *parse_crop(list *options, size_params params) int crop_height = option_find_int(options, "crop_height",1); int crop_width = option_find_int(options, "crop_width",1); int flip = option_find_int(options, "flip",0); + float angle = option_find_float(options, "angle",0); int batch,h,w,c; h = params.h; @@ -194,7 +195,7 @@ crop_layer *parse_crop(list *options, size_params params) batch=params.batch; if(!(h && w && c)) error("Layer before crop layer must output image."); - crop_layer *layer = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip); + crop_layer *layer = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle); option_unused(options); return layer; }