diff --git a/Makefile b/Makefile index 8ce68885..116d3bc7 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,8 @@ -GPU=1 -OPENCV=1 +GPU=0 +OPENCV=0 DEBUG=0 ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20 -ARCH= -arch sm_52 VPATH=./src/ EXEC=darknet @@ -11,7 +10,7 @@ OBJDIR=./obj/ CC=gcc NVCC=nvcc -OPTS=-O2 +OPTS=-Ofast LDFLAGS= -lm -pthread -lstdc++ COMMON= -I/usr/local/cuda/include/ CFLAGS=-Wall -Wfatal-errors @@ -35,7 +34,7 @@ CFLAGS+= -DGPU LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand endif -OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o +OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o ifeq ($(GPU), 1) OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o endif diff --git a/src/box.c b/src/box.c index d49be410..6045d9a7 100644 --- a/src/box.c +++ b/src/box.c @@ -85,6 +85,14 @@ float box_iou(box a, box b) return box_intersection(a, b)/box_union(a, b); } +float box_rmse(box a, box b) +{ + return sqrt(pow(a.x-b.x, 2) + + pow(a.y-b.y, 2) + + pow(a.w-b.w, 2) + + pow(a.h-b.h, 2)); +} + dbox dintersect(box a, box b) { float w = overlap(a.x, a.w, b.x, b.w); @@ -211,16 +219,16 @@ dbox diou(box a, box b) return dd; } -void do_nms(box *boxes, float **probs, int num_boxes, int classes, float thresh) +void do_nms(box *boxes, float **probs, int total, int classes, float thresh) { int i, j, k; - for(i = 0; i < num_boxes*num_boxes; ++i){ + for(i = 0; i < total; ++i){ int any = 0; for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); if(!any) { continue; } - for(j = i+1; j < num_boxes*num_boxes; ++j){ + for(j = i+1; j < total; ++j){ if (box_iou(boxes[i], boxes[j]) > thresh){ for(k = 0; k < classes; ++k){ if (probs[i][k] < probs[j][k]) probs[i][k] = 0; diff --git a/src/box.h b/src/box.h index e45dd890..f7ef36fd 100644 --- a/src/box.h +++ b/src/box.h @@ -10,8 +10,9 @@ typedef struct{ } dbox; float box_iou(box a, box b); +float box_rmse(box a, box b); dbox diou(box a, box b); -void do_nms(box *boxes, float **probs, int num_boxes, int classes, float thresh); +void do_nms(box *boxes, float **probs, int total, int classes, float thresh); box decode_box(box b, box anchor); box encode_box(box b, box anchor); diff --git a/src/coco.c b/src/coco.c index d2a108a7..62ae4292 100644 --- a/src/coco.c +++ b/src/coco.c @@ -17,7 +17,7 @@ int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25 void draw_coco(image im, float *pred, int side, char *label) { - int classes = 81; + int classes = 1; int elems = 4+classes; int j; int r, c; @@ -26,10 +26,9 @@ void draw_coco(image im, float *pred, int side, char *label) for(c = 0; c < side; ++c){ j = (r*side + c) * elems; int class = max_index(pred+j, classes); - if (class == 0) continue; if (pred[j+class] > 0.2){ int width = pred[j+class]*5 + 1; - printf("%f %s\n", pred[j+class], coco_classes[class-1]); + printf("%f %s\n", pred[j+class], "object"); //coco_classes[class-1]); float red = get_color(0,class,classes); float green = get_color(1,class,classes); float blue = get_color(2,class,classes); @@ -37,10 +36,10 @@ void draw_coco(image im, float *pred, int side, char *label) j += classes; box predict = {pred[j+0], pred[j+1], pred[j+2], pred[j+3]}; - box anchor = {(c+.5)/side, (r+.5)/side, .5, .5}; - box decode = decode_box(predict, anchor); + predict.x = (predict.x+c)/side; + predict.y = (predict.y+r)/side; - draw_bbox(im, decode, width, red, green, blue); + draw_bbox(im, predict, width, red, green, blue); } } } @@ -49,7 +48,8 @@ void draw_coco(image im, float *pred, int side, char *label) void train_coco(char *cfgfile, char *weightfile) { - char *train_images = "/home/pjreddie/data/coco/train.txt"; + //char *train_images = "/home/pjreddie/data/coco/train.txt"; + char *train_images = "/home/pjreddie/data/voc/test/train.txt"; char *backup_directory = "/home/pjreddie/backup/"; srand(time(0)); data_seed = time(0); @@ -65,8 +65,11 @@ void train_coco(char *cfgfile, char *weightfile) int i = net.seen/imgs; data train, buffer; - int classes = 81; - int side = 7; + + layer l = net.layers[net.n - 1]; + + int side = l.side; + int classes = l.classes; list *plist = get_paths(train_images); int N = plist->size; @@ -95,9 +98,9 @@ void train_coco(char *cfgfile, char *weightfile) printf("Loaded: %lf seconds\n", sec(clock()-time)); /* - image im = float_to_image(net.w, net.h, 3, train.X.vals[114]); + image im = float_to_image(net.w, net.h, 3, train.X.vals[113]); image copy = copy_image(im); - draw_coco(copy, train.y.vals[114], 7, "truth"); + draw_coco(copy, train.y.vals[113], 7, "truth"); cvWaitKey(0); free_image(copy); */ @@ -109,12 +112,19 @@ void train_coco(char *cfgfile, char *weightfile) avg_loss = avg_loss*.9 + loss*.1; printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); - if((i-1)*imgs <= 80*N && i*imgs > N*80){ - fprintf(stderr, "First stage done.\n"); + if((i-1)*imgs <= N && i*imgs > N){ + fprintf(stderr, "First stage done\n"); + net.learning_rate *= 10; char buff[256]; sprintf(buff, "%s/%s_first_stage.weights", backup_directory, base); save_weights(net, buff); - return; + } + + if((i-1)*imgs <= 80*N && i*imgs > N*80){ + fprintf(stderr, "Second stage done.\n"); + char buff[256]; + sprintf(buff, "%s/%s_second_stage.weights", backup_directory, base); + save_weights(net, buff); } if(i%1000==0){ char buff[256]; @@ -128,25 +138,52 @@ void train_coco(char *cfgfile, char *weightfile) save_weights(net, buff); } -void convert_cocos(float *predictions, int classes, int objectness, int background, int num_boxes, int w, int h, float thresh, float **probs, box *boxes) +void get_probs(float *predictions, int total, int classes, int inc, float **probs) { int i,j; - int per_box = 4+classes+(background || objectness); - for (i = 0; i < num_boxes*num_boxes; ++i){ - float scale = 1; - if(objectness) scale = 1-predictions[i*per_box]; - int offset = i*per_box+(background||objectness); + for (i = 0; i < total; ++i){ + int index = i*inc; + float scale = predictions[index]; + probs[i][0] = scale; for(j = 0; j < classes; ++j){ - float prob = scale*predictions[offset+j]; + probs[i][j] = scale*predictions[index+j+1]; + } + } +} +void get_boxes(float *predictions, int n, int num_boxes, int per_box, box *boxes) +{ + int i,j; + for (i = 0; i < num_boxes*num_boxes; ++i){ + for(j = 0; j < n; ++j){ + int index = i*n+j; + int offset = index*per_box; + int row = i / num_boxes; + int col = i % num_boxes; + boxes[index].x = (predictions[offset + 0] + col) / num_boxes; + boxes[index].y = (predictions[offset + 1] + row) / num_boxes; + boxes[index].w = predictions[offset + 2]; + boxes[index].h = predictions[offset + 3]; + } + } +} + +void convert_cocos(float *predictions, int classes, int num_boxes, int num, int w, int h, float thresh, float **probs, box *boxes) +{ + int i,j; + int per_box = 4+classes; + for (i = 0; i < num_boxes*num_boxes*num; ++i){ + int offset = i*per_box; + for(j = 0; j < classes; ++j){ + float prob = predictions[offset+j]; probs[i][j] = (prob > thresh) ? prob : 0; } int row = i / num_boxes; int col = i % num_boxes; offset += classes; - boxes[i].x = (predictions[offset + 0] + col) / num_boxes * w; - boxes[i].y = (predictions[offset + 1] + row) / num_boxes * h; - boxes[i].w = pow(predictions[offset + 2], 2) * w; - boxes[i].h = pow(predictions[offset + 3], 2) * h; + boxes[i].x = (predictions[offset + 0] + col) / num_boxes; + boxes[i].y = (predictions[offset + 1] + row) / num_boxes; + boxes[i].w = predictions[offset + 2]; + boxes[i].h = predictions[offset + 3]; } } @@ -181,6 +218,179 @@ int get_coco_image_id(char *filename) return atoi(p+1); } +void validate_recall(char *cfgfile, char *weightfile) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + srand(time(0)); + + char *val_images = "/home/pjreddie/data/voc/test/2007_test.txt"; + list *plist = get_paths(val_images); + char **paths = (char **)list_to_array(plist); + + layer l = net.layers[net.n - 1]; + + int num_boxes = l.side; + int num = l.n; + int classes = l.classes; + + int j; + + box *boxes = calloc(num_boxes*num_boxes*num, sizeof(box)); + float **probs = calloc(num_boxes*num_boxes*num, sizeof(float *)); + for(j = 0; j < num_boxes*num_boxes*num; ++j) probs[j] = calloc(classes+1, sizeof(float *)); + + int N = plist->size; + int i=0; + int k; + + float iou_thresh = .5; + float thresh = .1; + int total = 0; + int correct = 0; + float avg_iou = 0; + int nms = 0; + int proposals = 0; + + for (i = 0; i < N; ++i) { + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image resized = resize_image(orig, net.w, net.h); + + float *X = resized.data; + float *predictions = network_predict(net, X); + get_boxes(predictions+1+classes, num, num_boxes, 5+classes, boxes); + get_probs(predictions, num*num_boxes*num_boxes, classes, 5+classes, probs); + if (nms) do_nms(boxes, probs, num*num_boxes*num_boxes, (classes>0) ? classes : 1, iou_thresh); + + char *labelpath = find_replace(path, "images", "labels"); + labelpath = find_replace(labelpath, "JPEGImages", "labels"); + labelpath = find_replace(labelpath, ".jpg", ".txt"); + labelpath = find_replace(labelpath, ".JPEG", ".txt"); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < num_boxes*num_boxes*num; ++k){ + if(probs[k][0] > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < num_boxes*num_boxes*num; ++k){ + float iou = box_iou(boxes[k], t); + if(probs[k][0] > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + free(truth); + free_image(orig); + free_image(resized); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + } +} + +void extract_boxes(char *cfgfile, char *weightfile) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + srand(time(0)); + + char *val_images = "/home/pjreddie/data/voc/test/train.txt"; + list *plist = get_paths(val_images); + char **paths = (char **)list_to_array(plist); + + layer l = net.layers[net.n - 1]; + + int num_boxes = l.side; + int num = l.n; + int classes = l.classes; + + int j; + + box *boxes = calloc(num_boxes*num_boxes*num, sizeof(box)); + float **probs = calloc(num_boxes*num_boxes*num, sizeof(float *)); + for(j = 0; j < num_boxes*num_boxes*num; ++j) probs[j] = calloc(classes+1, sizeof(float *)); + + int N = plist->size; + int i=0; + int k; + + int count = 0; + float iou_thresh = .1; + + for (i = 0; i < N; ++i) { + fprintf(stderr, "%5d %5d\n", i, count); + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image resized = resize_image(orig, net.w, net.h); + + float *X = resized.data; + float *predictions = network_predict(net, X); + get_boxes(predictions+1+classes, num, num_boxes, 5+classes, boxes); + get_probs(predictions, num*num_boxes*num_boxes, classes, 5+classes, probs); + + char *labelpath = find_replace(path, "images", "labels"); + labelpath = find_replace(labelpath, "JPEGImages", "labels"); + labelpath = find_replace(labelpath, ".jpg", ".txt"); + labelpath = find_replace(labelpath, ".JPEG", ".txt"); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + FILE *label = stdin; + for(k = 0; k < num_boxes*num_boxes*num; ++k){ + int overlaps = 0; + for (j = 0; j < num_labels; ++j) { + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float iou = box_iou(boxes[k], t); + if (iou > iou_thresh){ + if (!overlaps) { + char buff[256]; + sprintf(buff, "/home/pjreddie/extracted/labels/%d.txt", count); + label = fopen(buff, "w"); + overlaps = 1; + } + fprintf(label, "%d %f\n", truth[j].id, iou); + } + } + if (overlaps) { + char buff[256]; + sprintf(buff, "/home/pjreddie/extracted/imgs/%d", count++); + int dx = (boxes[k].x - boxes[k].w/2) * orig.w; + int dy = (boxes[k].y - boxes[k].h/2) * orig.h; + int w = boxes[k].w * orig.w; + int h = boxes[k].h * orig.h; + image cropped = crop_image(orig, dx, dy, w, h); + image sized = resize_image(cropped, 224, 224); + #ifdef OPENCV + save_image_jpg(sized, buff); + #endif + free_image(sized); + free_image(cropped); + fclose(label); + } + } + free(truth); + free_image(orig); + free_image(resized); + } +} + void validate_coco(char *cfgfile, char *weightfile) { network net = parse_network_cfg(cfgfile); @@ -188,7 +398,6 @@ void validate_coco(char *cfgfile, char *weightfile) load_weights(&net, weightfile); } set_batch_network(&net, 1); - detection_layer layer = get_network_detection_layer(net); fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); srand(time(0)); @@ -196,10 +405,9 @@ void validate_coco(char *cfgfile, char *weightfile) list *plist = get_paths("data/coco_val_5k.list"); char **paths = (char **)list_to_array(plist); - int classes = layer.classes; - int objectness = layer.objectness; - int background = layer.background; - int num_boxes = sqrt(get_detection_layer_locations(layer)); + int num_boxes = 9; + int num = 4; + int classes = 1; int j; char buff[1024]; @@ -207,9 +415,9 @@ void validate_coco(char *cfgfile, char *weightfile) FILE *fp = fopen(buff, "w"); fprintf(fp, "[\n"); - box *boxes = calloc(num_boxes*num_boxes, sizeof(box)); - float **probs = calloc(num_boxes*num_boxes, sizeof(float *)); - for(j = 0; j < num_boxes*num_boxes; ++j) probs[j] = calloc(classes, sizeof(float *)); + box *boxes = calloc(num_boxes*num_boxes*num, sizeof(box)); + float **probs = calloc(num_boxes*num_boxes*num, sizeof(float *)); + for(j = 0; j < num_boxes*num_boxes*num; ++j) probs[j] = calloc(classes, sizeof(float *)); int m = plist->size; int i=0; @@ -257,7 +465,7 @@ void validate_coco(char *cfgfile, char *weightfile) float *predictions = network_predict(net, X); int w = val[t].w; int h = val[t].h; - convert_cocos(predictions, classes, objectness, background, num_boxes, w, h, thresh, probs, boxes); + convert_cocos(predictions, classes, num_boxes, num, w, h, thresh, probs, boxes); if (nms) do_nms(boxes, probs, num_boxes, classes, iou_thresh); print_cocos(fp, image_id, boxes, probs, num_boxes, classes, w, h); free_image(val[t]); @@ -319,5 +527,6 @@ void run_coco(int argc, char **argv) char *filename = (argc > 5) ? argv[5]: 0; if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename); else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); - else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 7dcf5a43..6e3f38bb 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -122,9 +122,9 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) cuda_free(l->delta_gpu); cuda_free(l->output_gpu); - l->col_image_gpu = cuda_make_array(0, out_h*out_w*l->size*l->size*l->c); - l->delta_gpu = cuda_make_array(0, l->batch*out_h*out_w*l->n); - l->output_gpu = cuda_make_array(0, l->batch*out_h*out_w*l->n); + l->col_image_gpu = cuda_make_array(l->col_image, out_h*out_w*l->size*l->size*l->c); + l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*l->n); + l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*l->n); #endif } @@ -261,7 +261,7 @@ image *get_filters(convolutional_layer l) int i; for(i = 0; i < l.n; ++i){ filters[i] = copy_image(get_convolutional_filter(l, i)); - normalize_image(filters[i]); + //normalize_image(filters[i]); } return filters; } diff --git a/src/crop_layer.c b/src/crop_layer.c index d9950d6c..7b340841 100644 --- a/src/crop_layer.c +++ b/src/crop_layer.c @@ -33,7 +33,7 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int l.output = calloc(crop_width*crop_height * c*batch, sizeof(float)); #ifdef GPU l.output_gpu = cuda_make_array(l.output, crop_width*crop_height*c*batch); - l.rand_gpu = cuda_make_array(0, l.batch*8); + l.rand_gpu = cuda_make_array(0, l.batch*8); #endif return l; } diff --git a/src/cuda.c b/src/cuda.c index 1b914a57..e95feff5 100644 --- a/src/cuda.c +++ b/src/cuda.c @@ -12,6 +12,7 @@ int gpu_index = 0; void check_error(cudaError_t status) { + cudaError_t status2 = cudaGetLastError(); if (status != cudaSuccess) { const char *s = cudaGetErrorString(status); @@ -21,6 +22,15 @@ void check_error(cudaError_t status) snprintf(buffer, 256, "CUDA Error: %s", s); error(buffer); } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } } dim3 cuda_gridsize(size_t n){ diff --git a/src/darknet.c b/src/darknet.c index 0928f28b..f87afc60 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -90,6 +90,17 @@ void partial(char *cfgfile, char *weightfile, char *outfile, int max) save_weights_upto(net, outfile, max); } +void stacked(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + net.seen = 0; + save_weights_double(net, outfile); +} + #include "convolutional_layer.h" void rescale_net(char *cfgfile, char *weightfile, char *outfile) { @@ -155,7 +166,8 @@ int main(int argc, char **argv) gpu_index = -1; #else if(gpu_index >= 0){ - cudaSetDevice(gpu_index); + cudaError_t status = cudaSetDevice(gpu_index); + check_error(status); } #endif @@ -185,6 +197,8 @@ int main(int argc, char **argv) rescale_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "partial")){ partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "stacked")){ + stacked(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "visualize")){ visualize(argv[2], (argc > 3) ? argv[3] : 0); } else if (0 == strcmp(argv[1], "imtest")){ diff --git a/src/data.c b/src/data.c index a335e07f..ec2b3046 100644 --- a/src/data.c +++ b/src/data.c @@ -1,6 +1,7 @@ #include "data.h" #include "utils.h" #include "image.h" +#include "cuda.h" #include #include @@ -76,12 +77,6 @@ matrix load_image_paths(char **paths, int n, int w, int h) return X; } -typedef struct{ - int id; - float x,y,w,h; - float left, right, top, bottom; -} box_label; - box_label *read_boxes(char *filename, int *n) { box_label *boxes = calloc(1, sizeof(box_label)); @@ -152,6 +147,7 @@ void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) { char *labelpath = find_replace(path, "images", "labels"); + labelpath = find_replace(labelpath, "JPEGImages", "labels"); labelpath = find_replace(labelpath, ".jpg", ".txt"); labelpath = find_replace(labelpath, ".JPEG", ".txt"); int count = 0; @@ -162,42 +158,30 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int int id; int i; - for(i = 0; i < num_boxes*num_boxes*(4+classes); i += 4+classes){ - truth[i] = 1; - } - - for(i = 0; i < count; ++i){ - x = boxes[i].x; - y = boxes[i].y; - w = boxes[i].w; - h = boxes[i].h; + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; id = boxes[i].id; - if (x <= 0 || x >= 1 || y <= 0 || y >= 1) continue; if (w < .01 || h < .01) continue; int col = (int)(x*num_boxes); int row = (int)(y*num_boxes); - float xa = (col+.5)/num_boxes; - float ya = (row+.5)/num_boxes; - float wa = .5; - float ha = .5; + x = x*num_boxes - col; + y = y*num_boxes - row; - float tx = (x - xa) / wa; - float ty = (y - ya) / ha; - float tw = log2(w/wa); - float th = log2(h/ha); - - int index = (col+row*num_boxes)*(4+classes); - if(!truth[index]) continue; - truth[index] = 0; - truth[index+id+1] = 1; + int index = (col+row*num_boxes)*(5+classes); + if (truth[index]) continue; + truth[index++] = 1; + if (classes) truth[index+id] = 1; index += classes; - truth[index++] = tx; - truth[index++] = ty; - truth[index++] = tw; - truth[index++] = th; + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; } free(boxes); } @@ -375,7 +359,7 @@ void free_data(data d) } } -data load_data_region(int n, char **paths, int m, int classes, int w, int h, int num_boxes) +data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes) { char **random_paths = get_random_paths(paths, n, m); int i; @@ -386,7 +370,7 @@ data load_data_region(int n, char **paths, int m, int classes, int w, int h, int d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.cols = h*w*3; - int k = num_boxes*num_boxes*(4+classes); + int k = size*size*(5+classes); d.y = make_matrix(n, k); for(i = 0; i < n; ++i){ image orig = load_image_color(random_paths[i], 0, 0); @@ -418,7 +402,7 @@ data load_data_region(int n, char **paths, int m, int classes, int w, int h, int if(flip) flip_image(sized); d.X.vals[i] = sized.data; - fill_truth_region(random_paths[i], d.y.vals[i], classes, num_boxes, flip, dx, dy, 1./sx, 1./sy); + fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); free_image(orig); free_image(cropped); @@ -427,6 +411,37 @@ data load_data_region(int n, char **paths, int m, int classes, int w, int h, int return d; } +data load_data_compare(int n, char **paths, int m, int classes, int w, int h) +{ + char **random_paths = get_random_paths(paths, 2*n, m); + int i; + data d; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*6; + + int k = 2*(classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image im1 = load_image_color(random_paths[i*2], w, h); + image im2 = load_image_color(random_paths[i*2+1], w, h); + + d.X.vals[i] = calloc(d.X.cols, sizeof(float)); + memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); + memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); + + //char *imlabel1 = find_replace(random_paths[i*2], "imgs", "labels"); + //char *imlabel2 = find_replace(random_paths[i*2+1], "imgs", "labels"); + + free_image(im1); + free_image(im2); + } + free(random_paths); + return d; +} + data load_data_detection(int n, char **paths, int m, int classes, int w, int h, int num_boxes, int background) { char **random_paths = get_random_paths(paths, n, m); @@ -488,6 +503,12 @@ data load_data_detection(int n, char **paths, int m, int classes, int w, int h, void *load_thread(void *ptr) { + + #ifdef GPU + cudaError_t status = cudaSetDevice(gpu_index); + check_error(status); + #endif + printf("Loading data: %d\n", rand_r(&data_seed)); load_args a = *(struct load_args*)ptr; if (a.type == CLASSIFICATION_DATA){ @@ -495,7 +516,7 @@ void *load_thread(void *ptr) } else if (a.type == DETECTION_DATA){ *a.d = load_data_detection(a.n, a.paths, a.m, a.classes, a.w, a.h, a.num_boxes, a.background); } else if (a.type == REGION_DATA){ - *a.d = load_data_region(a.n, a.paths, a.m, a.classes, a.w, a.h, a.num_boxes); + *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes); } else if (a.type == IMAGE_DATA){ *(a.im) = load_image_color(a.path, 0, 0); *(a.resized) = resize_image(*(a.im), a.w, a.h); diff --git a/src/data.h b/src/data.h index f71e04a5..7c425ba5 100644 --- a/src/data.h +++ b/src/data.h @@ -35,7 +35,6 @@ typedef struct load_args{ int n; int m; char **labels; - int k; int h; int w; int nh; @@ -49,6 +48,12 @@ typedef struct load_args{ data_type type; } load_args; +typedef struct{ + int id; + float x,y,w,h; + float left, right, top, bottom; +} box_label; + void free_data(data d); pthread_t load_data_in_thread(load_args args); @@ -59,6 +64,7 @@ data load_data_captcha_encode(char **paths, int n, int m, int w, int h); data load_data(char **paths, int n, int m, char **labels, int k, int w, int h); data load_data_detection(int n, char **paths, int m, int classes, int w, int h, int num_boxes, int background); +box_label *read_boxes(char *filename, int *n); data load_cifar10_data(char *filename); data load_all_cifar10(); diff --git a/src/detection_layer.c b/src/detection_layer.c index f83e2e47..80b606b5 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -39,8 +39,8 @@ detection_layer make_detection_layer(int batch, int inputs, int classes, int coo l.output = calloc(batch*outputs, sizeof(float)); l.delta = calloc(batch*outputs, sizeof(float)); #ifdef GPU - l.output_gpu = cuda_make_array(0, batch*outputs); - l.delta_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = cuda_make_array(l.output, batch*outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*outputs); #endif fprintf(stderr, "Detection Layer\n"); diff --git a/src/image.c b/src/image.c index 8669294c..fa0bcebc 100644 --- a/src/image.c +++ b/src/image.c @@ -271,31 +271,27 @@ void show_image_cv(image p, char *name) if(!success) fprintf(stderr, "Failed to write image %s\n", buff); } - /* - void save_image_cv(image p, char *name) - { - int x,y,k; - image copy = copy_image(p); - //normalize_image(copy); +#ifdef OPENCV + void save_image_jpg(image p, char *name) + { + int x,y,k; - char buff[256]; - //sprintf(buff, "%s (%d)", name, windows); - sprintf(buff, "%s.png", name); + char buff[256]; + sprintf(buff, "%s.jpg", name); - IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c); - int step = disp->widthStep; - for(y = 0; y < p.h; ++y){ - for(x = 0; x < p.w; ++x){ - for(k= 0; k < p.c; ++k){ - disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255); + IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c); + int step = disp->widthStep; + for(y = 0; y < p.h; ++y){ + for(x = 0; x < p.w; ++x){ + for(k= 0; k < p.c; ++k){ + disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(p,x,y,k)*255); + } + } + } + cvSaveImage(buff, disp,0); + cvReleaseImage(&disp); } - } - } - free_image(copy); - cvSaveImage(buff, disp,0); - cvReleaseImage(&disp); - } - */ + #endif void show_image_layers(image p, char *name) { @@ -868,6 +864,7 @@ void show_image_cv(image p, char *name) void show_images(image *ims, int n, char *window) { image m = collapse_images_vert(ims, n); + /* int w = 448; int h = ((float)m.h/m.w) * 448; if(h > 896){ @@ -875,6 +872,9 @@ void show_image_cv(image p, char *name) w = ((float)m.w/m.h) * 896; } image sized = resize_image(m, w, h); + */ + normalize_image(m); + image sized = resize_image(m, m.w, m.h); save_image(sized, window); show_image(sized, window); free_image(sized); diff --git a/src/image.h b/src/image.h index f8577cd7..27dc62a7 100644 --- a/src/image.h +++ b/src/image.h @@ -47,6 +47,10 @@ void show_images(image *ims, int n, char *window); void show_image_layers(image p, char *name); void show_image_collapsed(image p, char *name); +#ifdef OPENCV +void save_image_jpg(image p, char *name); +#endif + void print_image(image m); image make_image(int w, int h, int c); diff --git a/src/imagenet.c b/src/imagenet.c index fb573071..5d794835 100644 --- a/src/imagenet.c +++ b/src/imagenet.c @@ -21,11 +21,11 @@ void train_imagenet(char *cfgfile, char *weightfile) printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); //net.seen=0; int imgs = 1024; - int i = net.seen/imgs; char **labels = get_labels("data/inet.labels.list"); list *plist = get_paths("/data/imagenet/cls.train.list"); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); + int N = plist->size; clock_t time; pthread_t load_thread; data train; @@ -37,14 +37,14 @@ void train_imagenet(char *cfgfile, char *weightfile) args.paths = paths; args.classes = 1000; args.n = imgs; - args.m = plist->size; + args.m = N; args.labels = labels; args.d = &buffer; args.type = CLASSIFICATION_DATA; load_thread = load_data_in_thread(args); + int epoch = net.seen/N; while(1){ - ++i; time=clock(); pthread_join(load_thread, 0); train = buffer; @@ -62,15 +62,23 @@ void train_imagenet(char *cfgfile, char *weightfile) net.seen += imgs; if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); + printf("%.3f: %f, %f avg, %lf seconds, %d images\n", (float)net.seen/N, loss, avg_loss, sec(clock()-time), net.seen); free_data(train); - if((i % 30000) == 0) net.learning_rate *= .1; - if(i%1000==0){ + if(net.seen/N > epoch){ + epoch = net.seen/N; char buff[256]; - sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; } } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)labels, 1000); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); } void validate_imagenet(char *filename, char *weightfile) diff --git a/src/layer.c b/src/layer.c new file mode 100644 index 00000000..557aa3b4 --- /dev/null +++ b/src/layer.c @@ -0,0 +1,46 @@ +#include "layer.h" +#include "cuda.h" +#include + +void free_layer(layer l) +{ + if(l.type == DROPOUT){ + if(l.rand) free(l.rand); +#ifdef GPU + if(l.rand_gpu) cuda_free(l.rand_gpu); +#endif + return; + } + if(l.indexes) free(l.indexes); + if(l.rand) free(l.rand); + if(l.cost) free(l.cost); + if(l.filters) free(l.filters); + if(l.filter_updates) free(l.filter_updates); + if(l.biases) free(l.biases); + if(l.bias_updates) free(l.bias_updates); + if(l.weights) free(l.weights); + if(l.weight_updates) free(l.weight_updates); + if(l.col_image) free(l.col_image); + if(l.input_layers) free(l.input_layers); + if(l.input_sizes) free(l.input_sizes); + if(l.delta) free(l.delta); + if(l.output) free(l.output); + if(l.squared) free(l.squared); + if(l.norms) free(l.norms); + +#ifdef GPU + if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); + if(l.filters_gpu) cuda_free(l.filters_gpu); + if(l.filter_updates_gpu) cuda_free(l.filter_updates_gpu); + if(l.col_image_gpu) cuda_free(l.col_image_gpu); + if(l.weights_gpu) cuda_free(l.weights_gpu); + if(l.biases_gpu) cuda_free(l.biases_gpu); + if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); + if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); + if(l.output_gpu) cuda_free(l.output_gpu); + if(l.delta_gpu) cuda_free(l.delta_gpu); + if(l.rand_gpu) cuda_free(l.rand_gpu); + if(l.squared_gpu) cuda_free(l.squared_gpu); + if(l.norms_gpu) cuda_free(l.norms_gpu); +#endif +} diff --git a/src/layer.h b/src/layer.h index 4cd9f288..77d7f089 100644 --- a/src/layer.h +++ b/src/layer.h @@ -35,6 +35,7 @@ typedef struct { int n; int groups; int size; + int side; int stride; int pad; int crop_width; @@ -60,6 +61,7 @@ typedef struct { float probability; float scale; + int *indexes; float *rand; float *cost; @@ -101,4 +103,6 @@ typedef struct { #endif } layer; +void free_layer(layer); + #endif diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index ef06175c..20176277 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -66,8 +66,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) cuda_free(l->output_gpu); cuda_free(l->delta_gpu); l->indexes_gpu = cuda_make_int_array(output_size); - l->output_gpu = cuda_make_array(0, output_size); - l->delta_gpu = cuda_make_array(0, output_size); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); #endif } diff --git a/src/network.c b/src/network.c index de3e569f..70bcb584 100644 --- a/src/network.c +++ b/src/network.c @@ -519,4 +519,17 @@ float network_accuracy_multi(network net, data d, int n) return acc; } - +void free_network(network net) +{ + int i; + for(i = 0; i < net.n; ++i){ + free_layer(net.layers[i]); + } + free(net.layers); + #ifdef GPU + if(*net.input_gpu) cuda_free(*net.input_gpu); + if(*net.truth_gpu) cuda_free(*net.truth_gpu); + if(net.input_gpu) free(net.input_gpu); + if(net.truth_gpu) free(net.truth_gpu); + #endif +} diff --git a/src/network.h b/src/network.h index b684d33a..1d960c07 100644 --- a/src/network.h +++ b/src/network.h @@ -38,6 +38,7 @@ void forward_network_gpu(network net, network_state state); void backward_network_gpu(network net, network_state state); #endif +void free_network(network net); void compare_networks(network n1, network n2, data d); char *get_layer_string(LAYER_TYPE a); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 593de0ae..a73ddd9c 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -1,6 +1,7 @@ extern "C" { #include #include +#include #include "network.h" #include "image.h" diff --git a/src/nightmare.c b/src/nightmare.c index ba69e6bc..0eb3ca1d 100644 --- a/src/nightmare.c +++ b/src/nightmare.c @@ -49,7 +49,7 @@ void optimize_picture(network *net, image orig, int max_layer, float scale, floa #ifdef GPU state.input = cuda_make_array(im.data, im.w*im.h*im.c); - state.delta = cuda_make_array(0, im.w*im.h*im.c); + state.delta = cuda_make_array(im.data, im.w*im.h*im.c); forward_network_gpu(*net, state); copy_ongpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1); diff --git a/src/normalization_layer.c b/src/normalization_layer.c index 587ece78..0551337a 100644 --- a/src/normalization_layer.c +++ b/src/normalization_layer.c @@ -22,10 +22,10 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a layer.inputs = w*h*c; layer.outputs = layer.inputs; #ifdef GPU - layer.output_gpu = cuda_make_array(0, h * w * c * batch); - layer.delta_gpu = cuda_make_array(0, h * w * c * batch); - layer.squared_gpu = cuda_make_array(0, h * w * c * batch); - layer.norms_gpu = cuda_make_array(0, h * w * c * batch); + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); + layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); + layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); + layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); #endif return layer; } @@ -49,10 +49,10 @@ void resize_normalization_layer(layer *layer, int w, int h) cuda_free(layer->delta_gpu); cuda_free(layer->squared_gpu); cuda_free(layer->norms_gpu); - layer->output_gpu = cuda_make_array(0, h * w * c * batch); - layer->delta_gpu = cuda_make_array(0, h * w * c * batch); - layer->squared_gpu = cuda_make_array(0, h * w * c * batch); - layer->norms_gpu = cuda_make_array(0, h * w * c * batch); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); #endif } diff --git a/src/parser.c b/src/parser.c index 242a83c8..ad324e94 100644 --- a/src/parser.c +++ b/src/parser.c @@ -180,7 +180,8 @@ region_layer parse_region(list *options, size_params params) int classes = option_find_int(options, "classes", 1); int rescore = option_find_int(options, "rescore", 0); int num = option_find_int(options, "num", 1); - region_layer layer = make_region_layer(params.batch, params.inputs, num, classes, coords, rescore); + int side = option_find_int(options, "side", 7); + region_layer layer = make_region_layer(params.batch, params.inputs, num, side, classes, coords, rescore); return layer; } @@ -342,6 +343,7 @@ network parse_network_cfg(char *filename) n = n->next; int count = 0; + free_section(s); while(n){ fprintf(stderr, "%d: ", count); s = (section *)n->val; @@ -521,6 +523,45 @@ list *read_cfg(char *filename) return sections; } +void save_weights_double(network net, char *filename) +{ + fprintf(stderr, "Saving doubled weights to %s\n", filename); + FILE *fp = fopen(filename, "w"); + if(!fp) file_error(filename); + + fwrite(&net.learning_rate, sizeof(float), 1, fp); + fwrite(&net.momentum, sizeof(float), 1, fp); + fwrite(&net.decay, sizeof(float), 1, fp); + fwrite(&net.seen, sizeof(int), 1, fp); + + int i,j,k; + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.type == CONVOLUTIONAL){ +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + float zero = 0; + fwrite(l.biases, sizeof(float), l.n, fp); + fwrite(l.biases, sizeof(float), l.n, fp); + + for (j = 0; j < l.n; ++j){ + int index = j*l.c*l.size*l.size; + fwrite(l.filters+index, sizeof(float), l.c*l.size*l.size, fp); + for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp); + } + for (j = 0; j < l.n; ++j){ + int index = j*l.c*l.size*l.size; + for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp); + fwrite(l.filters+index, sizeof(float), l.c*l.size*l.size, fp); + } + } + } + fclose(fp); +} + void save_weights_upto(network net, char *filename, int cutoff) { fprintf(stderr, "Saving weights to %s\n", filename); diff --git a/src/parser.h b/src/parser.h index fe9e5c47..6cff4fb5 100644 --- a/src/parser.h +++ b/src/parser.h @@ -6,6 +6,7 @@ network parse_network_cfg(char *filename); void save_network(network net, char *filename); void save_weights(network net, char *filename); void save_weights_upto(network net, char *filename, int cutoff); +void save_weights_double(network net, char *filename); void load_weights(network *net, char *filename); void load_weights_upto(network *net, char *filename, int cutoff); diff --git a/src/region_layer.c b/src/region_layer.c index 7c34b5cc..dcdcfadc 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -6,15 +6,11 @@ #include "cuda.h" #include "utils.h" #include +#include #include #include -int get_region_layer_locations(region_layer l) -{ - return l.inputs / (l.classes+l.coords); -} - -region_layer make_region_layer(int batch, int inputs, int n, int classes, int coords, int rescore) +region_layer make_region_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) { region_layer l = {0}; l.type = REGION; @@ -25,15 +21,17 @@ region_layer make_region_layer(int batch, int inputs, int n, int classes, int co l.classes = classes; l.coords = coords; l.rescore = rescore; + l.side = side; + assert(side*side*l.coords*l.n == inputs); l.cost = calloc(1, sizeof(float)); - int outputs = inputs; + int outputs = l.n*5*side*side; l.outputs = outputs; l.output = calloc(batch*outputs, sizeof(float)); - l.delta = calloc(batch*outputs, sizeof(float)); + l.delta = calloc(batch*inputs, sizeof(float)); #ifdef GPU - l.output_gpu = cuda_make_array(0, batch*outputs); - l.delta_gpu = cuda_make_array(0, batch*outputs); - #endif + l.output_gpu = cuda_make_array(l.output, batch*outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*inputs); +#endif fprintf(stderr, "Region Layer\n"); srand(0); @@ -43,91 +41,121 @@ region_layer make_region_layer(int batch, int inputs, int n, int classes, int co void forward_region_layer(const region_layer l, network_state state) { - int locations = get_region_layer_locations(l); + int locations = l.side*l.side; int i,j; for(i = 0; i < l.batch*locations; ++i){ - int index = i*(l.classes + l.coords); - int mask = (!state.truth || !state.truth[index]); + for(j = 0; j < l.n; ++j){ + int in_index = i*l.n*l.coords + j*l.coords; + int out_index = i*l.n*5 + j*5; - for(j = 0; j < l.classes; ++j){ - l.output[index+j] = state.input[index+j]; - } + float prob = state.input[in_index+0]; + float x = state.input[in_index+1]; + float y = state.input[in_index+2]; + float w = state.input[in_index+3]; + float h = state.input[in_index+4]; + /* + float min_w = state.input[in_index+5]; + float max_w = state.input[in_index+6]; + float min_h = state.input[in_index+7]; + float max_h = state.input[in_index+8]; + */ - softmax_array(l.output + index, l.classes, l.output + index); - index += l.classes; + l.output[out_index+0] = prob; + l.output[out_index+1] = x; + l.output[out_index+2] = y; + l.output[out_index+3] = w; + l.output[out_index+4] = h; - for(j = 0; j < l.coords; ++j){ - l.output[index+j] = mask*state.input[index+j]; } } if(state.train){ float avg_iou = 0; int count = 0; *(l.cost) = 0; - int size = l.outputs * l.batch; + int size = l.inputs * l.batch; memset(l.delta, 0, size * sizeof(float)); for (i = 0; i < l.batch*locations; ++i) { - int offset = i*(l.classes+l.coords); - int bg = state.truth[offset]; - for (j = offset; j < offset+l.classes; ++j) { - //*(l.cost) += pow(state.truth[j] - l.output[j], 2); - //l.delta[j] = state.truth[j] - l.output[j]; + + for(j = 0; j < l.n; ++j){ + int in_index = i*l.n*l.coords + j*l.coords; + l.delta[in_index+0] = .1*(0-state.input[in_index+0]); } - box anchor = {0,0,.5,.5}; - box truth_code = {state.truth[j+0], state.truth[j+1], state.truth[j+2], state.truth[j+3]}; - box out_code = {l.output[j+0], l.output[j+1], l.output[j+2], l.output[j+3]}; - box out = decode_box(out_code, anchor); - box truth = decode_box(truth_code, anchor); + int truth_index = i*5; + int best_index = -1; + float best_iou = 0; + float best_rmse = 4; + int bg = !state.truth[truth_index]; if(bg) continue; - //printf("Box: %f %f %f %f\n", truth.x, truth.y, truth.w, truth.h); - //printf("Code: %f %f %f %f\n", truth_code.x, truth_code.y, truth_code.w, truth_code.h); - //printf("Pred : %f %f %f %f\n", out.x, out.y, out.w, out.h); - // printf("Pred Code: %f %f %f %f\n", out_code.x, out_code.y, out_code.w, out_code.h); - float iou = box_iou(out, truth); - avg_iou += iou; - ++count; - /* - *(l.cost) += pow((1-iou), 2); - l.delta[j+0] = (state.truth[j+0] - l.output[j+0]); - l.delta[j+1] = (state.truth[j+1] - l.output[j+1]); - l.delta[j+2] = (state.truth[j+2] - l.output[j+2]); - l.delta[j+3] = (state.truth[j+3] - l.output[j+3]); - */ + box truth = {state.truth[truth_index+1], state.truth[truth_index+2], state.truth[truth_index+3], state.truth[truth_index+4]}; + truth.x /= l.side; + truth.y /= l.side; - for (j = offset+l.classes; j < offset+l.classes+l.coords; ++j) { - //*(l.cost) += pow(state.truth[j] - l.output[j], 2); - //l.delta[j] = state.truth[j] - l.output[j]; - float diff = state.truth[j] - l.output[j]; - if (fabs(diff) < 1){ - l.delta[j] = diff; - *(l.cost) += .5*pow(state.truth[j] - l.output[j], 2); - } else { - l.delta[j] = (diff > 0) ? 1 : -1; - *(l.cost) += fabs(diff) - .5; + for(j = 0; j < l.n; ++j){ + int out_index = i*l.n*5 + j*5; + box out = {l.output[out_index+1], l.output[out_index+2], l.output[out_index+3], l.output[out_index+4]}; + + //printf("\n%f %f %f %f %f\n", l.output[out_index+0], out.x, out.y, out.w, out.h); + + out.x /= l.side; + out.y /= l.side; + + float iou = box_iou(out, truth); + float rmse = box_rmse(out, truth); + if(best_iou > 0 || iou > 0){ + if(iou > best_iou){ + best_iou = iou; + best_index = j; + } + }else{ + if(rmse < best_rmse){ + best_rmse = rmse; + best_index = j; + } } - //l.delta[j] = state.truth[j] - l.output[j]; } + printf("%d", best_index); + //int out_index = i*l.n*5 + best_index*5; + //box out = {l.output[out_index+1], l.output[out_index+2], l.output[out_index+3], l.output[out_index+4]}; + int in_index = i*l.n*l.coords + best_index*l.coords; + + l.delta[in_index+0] = (1-state.input[in_index+0]); + l.delta[in_index+1] = state.truth[truth_index+1] - state.input[in_index+1]; + l.delta[in_index+2] = state.truth[truth_index+2] - state.input[in_index+2]; + l.delta[in_index+3] = state.truth[truth_index+3] - state.input[in_index+3]; + l.delta[in_index+4] = state.truth[truth_index+4] - state.input[in_index+4]; + /* + l.delta[in_index+5] = 0 - state.input[in_index+5]; + l.delta[in_index+6] = 1 - state.input[in_index+6]; + l.delta[in_index+7] = 0 - state.input[in_index+7]; + l.delta[in_index+8] = 1 - state.input[in_index+8]; + */ /* - if(l.rescore){ - for (j = offset; j < offset+l.classes; ++j) { - if(state.truth[j]) state.truth[j] = iou; - l.delta[j] = state.truth[j] - l.output[j]; - } - } - */ + float x = state.input[in_index+1]; + float y = state.input[in_index+2]; + float w = state.input[in_index+3]; + float h = state.input[in_index+4]; + float min_w = state.input[in_index+5]; + float max_w = state.input[in_index+6]; + float min_h = state.input[in_index+7]; + float max_h = state.input[in_index+8]; + */ + + + avg_iou += best_iou; + ++count; } - printf("Avg IOU: %f\n", avg_iou/count); + printf("\nAvg IOU: %f %d\n", avg_iou/count, count); } } void backward_region_layer(const region_layer l, network_state state) { - axpy_cpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1); - //copy_cpu(l.batch*l.inputs, l.delta_gpu, 1, state.delta, 1); + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); + //copy_cpu(l.batch*l.inputs, l.delta, 1, state.delta, 1); } #ifdef GPU @@ -147,7 +175,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state) cpu_state.input = in_cpu; forward_region_layer(l, cpu_state); cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); - cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); free(cpu_state.input); if(cpu_state.truth) free(cpu_state.truth); } diff --git a/src/region_layer.h b/src/region_layer.h index 00fbeba3..95f8e919 100644 --- a/src/region_layer.h +++ b/src/region_layer.h @@ -6,7 +6,7 @@ typedef layer region_layer; -region_layer make_region_layer(int batch, int inputs, int n, int classes, int coords, int rescore); +region_layer make_region_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); void forward_region_layer(const region_layer l, network_state state); void backward_region_layer(const region_layer l, network_state state); diff --git a/src/route_layer.c b/src/route_layer.c index 67b606c3..df50b64f 100644 --- a/src/route_layer.c +++ b/src/route_layer.c @@ -21,11 +21,11 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz fprintf(stderr, "\n"); l.outputs = outputs; l.inputs = outputs; - l.delta = calloc(outputs*batch, sizeof(float)); + l.delta = calloc(outputs*batch, sizeof(float)); l.output = calloc(outputs*batch, sizeof(float));; #ifdef GPU - l.delta_gpu = cuda_make_array(0, outputs*batch); - l.output_gpu = cuda_make_array(0, outputs*batch); + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + l.output_gpu = cuda_make_array(l.output, outputs*batch); #endif return l; } diff --git a/src/utils.c b/src/utils.c index ebd1023b..d54e9665 100644 --- a/src/utils.c +++ b/src/utils.c @@ -208,6 +208,13 @@ void strip_char(char *s, char bad) s[len-offset] = '\0'; } +void free_ptrs(void **ptrs, int n) +{ + int i; + for(i = 0; i < n; ++i) free(ptrs[i]); + free(ptrs); +} + char *fgetl(FILE *fp) { if(feof(fp)) return 0; diff --git a/src/utils.h b/src/utils.h index e93cdd01..93327022 100644 --- a/src/utils.h +++ b/src/utils.h @@ -6,6 +6,7 @@ #define SECRET_NUM -1234 +void free_ptrs(void **ptrs, int n); char *basecfg(char *cfgfile); int alphanum_to_int(char c); char int_to_alphanum(int i); diff --git a/src/yolo.c b/src/yolo.c index 13f08240..9bf96dea 100644 --- a/src/yolo.c +++ b/src/yolo.c @@ -138,6 +138,7 @@ void train_yolo(char *cfgfile, char *weightfile) pthread_join(load_thread, 0); free_data(buffer); + args.background = background; load_thread = load_data_in_thread(args); } @@ -283,7 +284,7 @@ void validate_yolo(char *cfgfile, char *weightfile) int w = val[t].w; int h = val[t].h; convert_yolo_detections(predictions, classes, objectness, background, num_boxes, w, h, thresh, probs, boxes); - if (nms) do_nms(boxes, probs, num_boxes, classes, iou_thresh); + if (nms) do_nms(boxes, probs, num_boxes*num_boxes, classes, iou_thresh); print_yolo_detections(fps, id, boxes, probs, num_boxes, classes, w, h); free(id); free_image(val[t]);