From 28d5a4a913b662172c03985e57bbd4ecc5e00c73 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Sun, 31 May 2015 13:49:50 -0700 Subject: [PATCH] more detection stuff --- src/data.c | 4 +- src/detection.c | 97 +++++++++++++++++++++++++++++++++++++++--- src/detection_layer.c | 5 +-- src/imagenet.c | 2 +- src/network_kernels.cu | 4 +- src/writing.c | 73 +++++++++++++++++++++++++++++++ 6 files changed, 169 insertions(+), 16 deletions(-) create mode 100644 src/writing.c diff --git a/src/data.c b/src/data.c index ca5f4a63..425d216c 100644 --- a/src/data.c +++ b/src/data.c @@ -527,11 +527,11 @@ pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, in data load_data_writing(char **paths, int n, int m, int w, int h) { if(m) paths = get_random_paths(paths, n, m); - char **replace_paths = find_replace_paths(paths, n, ".png", "label.png"); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); data d; d.shallow = 0; d.X = load_image_paths(paths, n, w, h); - d.y = load_image_paths_gray(replace_paths, n, w/4, h/4); + d.y = load_image_paths_gray(replace_paths, n, w/8, h/8); if(m) free(paths); int i; for(i = 0; i < n; ++i) free(replace_paths[i]); diff --git a/src/detection.c b/src/detection.c index c012848c..0dbedb1b 100644 --- a/src/detection.c +++ b/src/detection.c @@ -21,7 +21,7 @@ void draw_detection(image im, float *box, int side, char *label) //printf("%d\n", j); //printf("Prob: %f\n", box[j]); int class = max_index(box+j, classes); - if(box[j+class] > .4){ + if(box[j+class] > .05){ //int z; //for(z = 0; z < classes; ++z) printf("%f %s\n", box[j+z], class_names[z]); printf("%f %s\n", box[j+class], class_names[class]); @@ -257,8 +257,8 @@ void train_detection(char *cfgfile, char *weightfile) if (imgnet){ plist = get_paths("/home/pjreddie/data/imagenet/det.train.list"); }else{ - plist = get_paths("/home/pjreddie/data/voc/no_2012_val.txt"); - //plist = get_paths("/home/pjreddie/data/voc/no_2007_test.txt"); + //plist = get_paths("/home/pjreddie/data/voc/no_2012_val.txt"); + plist = get_paths("/home/pjreddie/data/voc/no_2007_test.txt"); //plist = get_paths("/home/pjreddie/data/coco/trainval.txt"); //plist = get_paths("/home/pjreddie/data/voc/all2007-2012.txt"); } @@ -289,7 +289,7 @@ void train_detection(char *cfgfile, char *weightfile) if(i == 100){ net.learning_rate *= 10; } - if(i%100==0){ + if(i%1000==0){ char buff[256]; sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); save_weights(net, buff); @@ -336,8 +336,8 @@ void validate_detection(char *cfgfile, char *weightfile) fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); srand(time(0)); - //list *plist = get_paths("/home/pjreddie/data/voc/test_2007.txt"); - list *plist = get_paths("/home/pjreddie/data/voc/val_2012.txt"); + list *plist = get_paths("/home/pjreddie/data/voc/test_2007.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/val_2012.txt"); //list *plist = get_paths("/home/pjreddie/data/voc/test.txt"); //list *plist = get_paths("/home/pjreddie/data/voc/val.expanded.txt"); //list *plist = get_paths("/home/pjreddie/data/voc/train.txt"); @@ -388,6 +388,89 @@ void validate_detection(char *cfgfile, char *weightfile) } } +void do_mask(network net, data d, int offset, int classes, int nuisance, int background, int num_boxes, int per_box) +{ + matrix pred = network_predict_data(net, d); + int j, k, class; + for(j = 0; j < pred.rows; ++j){ + printf("%d ", offset + j); + for(k = 0; k < pred.cols; k += per_box){ + float scale = 1.; + if (nuisance) scale = 1.-pred.vals[j][k]; + float max_prob = 0; + for (class = 0; class < classes; ++class){ + float prob = scale*pred.vals[j][k+class+background+nuisance]; + if(prob > max_prob) max_prob = prob; + } + printf("%f ", max_prob); + } + printf("\n"); + } + free_matrix(pred); +} + +void mask_detection(char *cfgfile, char *weightfile) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + detection_layer layer = get_network_detection_layer(net); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + srand(time(0)); + + list *plist = get_paths("/home/pjreddie/data/voc/test_2007.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/val_2012.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/val.expanded.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/train.txt"); + char **paths = (char **)list_to_array(plist); + + int classes = layer.classes; + int nuisance = layer.nuisance; + int background = (layer.background && !nuisance); + int num_boxes = sqrt(get_detection_layer_locations(layer)); + + int per_box = 4+classes+background+nuisance; + int num_output = num_boxes*num_boxes*per_box; + + int m = plist->size; + int i = 0; + int splits = 100; + + int nthreads = 4; + int t; + data *val = calloc(nthreads, sizeof(data)); + data *buf = calloc(nthreads, sizeof(data)); + pthread_t *thr = calloc(nthreads, sizeof(data)); + for(t = 0; t < nthreads; ++t){ + int num = (i+1+t)*m/splits - (i+t)*m/splits; + char **part = paths+((i+t)*m/splits); + thr[t] = load_data_thread(part, num, 0, 0, num_output, net.w, net.h, &(buf[t])); + } + + clock_t time; + for(i = nthreads; i <= splits; i += nthreads){ + time=clock(); + for(t = 0; t < nthreads; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + } + for(t = 0; t < nthreads && i < splits; ++t){ + int num = (i+1+t)*m/splits - (i+t)*m/splits; + char **part = paths+((i+t)*m/splits); + thr[t] = load_data_thread(part, num, 0, 0, num_output, net.w, net.h, &(buf[t])); + } + + fprintf(stderr, "%d: Loaded: %lf seconds\n", i, sec(clock()-time)); + for(t = 0; t < nthreads; ++t){ + do_mask(net, val[t], (i-nthreads+t)*m/splits, classes, nuisance, background, num_boxes, per_box); + free_data(val[t]); + } + time=clock(); + } +} + void validate_detection_post(char *cfgfile, char *weightfile) { network net = parse_network_cfg(cfgfile); @@ -534,6 +617,7 @@ void test_detection(char *cfgfile, char *weightfile) printf("%s: Predicted in %f seconds.\n", filename, sec(clock()-time)); draw_detection(im, predictions, 7, "detections"); free_image(im); + cvWaitKey(0); } } @@ -551,5 +635,6 @@ void run_detection(int argc, char **argv) else if(0==strcmp(argv[2], "teststuff")) train_detection_teststuff(cfg, weights); else if(0==strcmp(argv[2], "trainloc")) train_localization(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_detection(cfg, weights); + else if(0==strcmp(argv[2], "mask")) mask_detection(cfg, weights); else if(0==strcmp(argv[2], "validpost")) validate_detection_post(cfg, weights); } diff --git a/src/detection_layer.c b/src/detection_layer.c index ae5930fd..fcae7f31 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -372,15 +372,12 @@ void forward_detection_layer(const detection_layer l, network_state state) l.delta[j+1] = 4 * (state.truth[j+1] - l.output[j+1]); l.delta[j+2] = 4 * (state.truth[j+2] - l.output[j+2]); l.delta[j+3] = 4 * (state.truth[j+3] - l.output[j+3]); - if(1){ + if(0){ for (j = offset; j < offset+classes; ++j) { if(state.truth[j]) state.truth[j] = iou; l.delta[j] = state.truth[j] - l.output[j]; } } - - /* - */ } printf("Avg IOU: %f\n", avg_iou/count); } diff --git a/src/imagenet.c b/src/imagenet.c index 2e1b685b..9925a9ab 100644 --- a/src/imagenet.c +++ b/src/imagenet.c @@ -32,7 +32,7 @@ void train_imagenet(char *cfgfile, char *weightfile) pthread_join(load_thread, 0); train = buffer; -/* + /* image im = float_to_image(256, 256, 3, train.X.vals[114]); show_image(im, "training"); cvWaitKey(0); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 2ca2e2db..5e353aee 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -133,20 +133,18 @@ float train_network_datum_gpu(network net, float *x, float *y) float *get_network_output_layer_gpu(network net, int i) { layer l = net.layers[i]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); if(l.type == CONVOLUTIONAL){ return l.output; } else if(l.type == DECONVOLUTIONAL){ return l.output; } else if(l.type == CONNECTED){ - cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); return l.output; } else if(l.type == DETECTION){ - cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); return l.output; } else if(l.type == MAXPOOL){ return l.output; } else if(l.type == SOFTMAX){ - pull_softmax_layer_output(l); return l.output; } return 0; diff --git a/src/writing.c b/src/writing.c new file mode 100644 index 00000000..1c1684bc --- /dev/null +++ b/src/writing.c @@ -0,0 +1,73 @@ +#include "network.h" +#include "utils.h" +#include "parser.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + data_seed = time(0); + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = net.seen/imgs; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_writing(paths, imgs, plist->size, 512, 512); + float loss = train_network(net, train); + #ifdef GPU + float *out = get_network_output_gpu(net); + #else + float *out = get_network_output(net); + #endif + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + +/* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + net.seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); + free_data(train); + if((i % 20000) == 0) net.learning_rate *= .1; + //if(i%100 == 0 && net.learning_rate > .00001) net.learning_rate *= .97; + if(i%1000==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); +} +