diff --git a/Makefile b/Makefile index 22b40a85..48560f9c 100644 --- a/Makefile +++ b/Makefile @@ -57,8 +57,8 @@ CFLAGS+= -DCUDNN LDFLAGS+= -lcudnn endif -OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o -EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o attention.o darknet.o +OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o +EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o darknet.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o @@ -97,5 +97,5 @@ results: .PHONY: clean clean: - rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) + rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* diff --git a/cfg/darknet.cfg b/cfg/darknet.cfg index ccb4629b..ec902830 100644 --- a/cfg/darknet.cfg +++ b/cfg/darknet.cfg @@ -1,12 +1,12 @@ [net] # Train -batch=1 -subdivisions=1 + batch=128 + subdivisions=1 # Test -# batch=1 -# subdivisions=1 -height=224 -width=224 +#batch=1 +#subdivisions=1 +height=256 +width=256 channels=3 momentum=0.9 decay=0.0005 @@ -88,7 +88,6 @@ activation=leaky [maxpool] size=2 stride=2 -padding=1 [convolutional] batch_normalize=1 @@ -110,6 +109,3 @@ activation=leaky [softmax] groups=1 -[cost] -type=sse - diff --git a/cfg/darknet19.cfg b/cfg/darknet19.cfg index bf73fb7b..f56a46e2 100644 --- a/cfg/darknet19.cfg +++ b/cfg/darknet19.cfg @@ -1,17 +1,31 @@ [net] -batch=128 -subdivisions=1 -height=224 -width=224 +# Training +#batch=128 +#subdivisions=2 + +# Testing + batch=1 + subdivisions=1 + +height=256 +width=256 +min_crop=128 +max_crop=448 channels=3 momentum=0.9 decay=0.0005 -max_crop=448 +burn_in=1000 learning_rate=0.1 policy=poly power=4 -max_batches=1600000 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 [convolutional] batch_normalize=1 diff --git a/cfg/imagenet22k.dataset b/cfg/imagenet22k.dataset index 920785d6..e25ef007 100644 --- a/cfg/imagenet22k.dataset +++ b/cfg/imagenet22k.dataset @@ -1,6 +1,7 @@ classes=21842 train = /data/imagenet/imagenet22k.train.list valid = /data/imagenet/imagenet22k.valid.list +#valid = /data/imagenet/imagenet1k.valid.list backup = /home/pjreddie/backup/ labels = data/imagenet.labels.list names = data/imagenet.shortnames.list diff --git a/cfg/tiny-yolo.cfg b/cfg/tiny-yolo.cfg index 9a4a184f..37e71356 100644 --- a/cfg/tiny-yolo.cfg +++ b/cfg/tiny-yolo.cfg @@ -120,7 +120,7 @@ filters=425 activation=linear [region] -anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +anchors = 18.3274,21.6763, 59.9827,66.001, 106.83,175.179, 252.25,112.889, 312.657,293.385 bias_match=1 classes=80 coords=4 diff --git a/cfg/yolo.cfg b/cfg/yolo.cfg index 088edf81..b8a9f683 100644 --- a/cfg/yolo.cfg +++ b/cfg/yolo.cfg @@ -1,10 +1,10 @@ [net] # Testing -batch=1 -subdivisions=1 +# batch=1 +# subdivisions=1 # Training -# batch=64 -# subdivisions=8 +batch=64 +subdivisions=8 width=608 height=608 channels=3 @@ -239,7 +239,7 @@ activation=linear [region] -anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +anchors = 18.3274,21.6763, 59.9827,66.001, 106.83,175.179, 252.25,112.889, 312.657,293.385 bias_match=1 classes=80 coords=4 diff --git a/examples/classifier.c b/examples/classifier.c index 8843e548..e707ebc3 100644 --- a/examples/classifier.c +++ b/examples/classifier.c @@ -44,11 +44,17 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, list *options = read_data_cfg(datacfg); char *backup_directory = option_find_str(options, "backup", "/backup/"); + int tag = option_find_int_quiet(options, "tag", 0); char *label_list = option_find_str(options, "labels", "data/labels.list"); char *train_list = option_find_str(options, "train", "data/train.list"); + char *tree = option_find_str(options, "tree", 0); + if (tree) net->hierarchy = read_tree(tree); int classes = option_find_int(options, "classes", 2); - char **labels = get_labels(label_list); + char **labels; + if(!tag){ + labels = get_labels(label_list); + } list *plist = get_paths(train_list); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); @@ -76,7 +82,11 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, args.n = imgs; args.m = N; args.labels = labels; - args.type = CLASSIFICATION_DATA; + if (tag){ + args.type = TAG_DATA; + } else { + args.type = CLASSIFICATION_DATA; + } data train; data buffer; @@ -385,15 +395,13 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile) } } image im = load_image_color(paths[i], 0, 0); - image resized = resize_min(im, net->w); - image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + image crop = center_crop_image(im, net->w, net->h); //show_image(im, "orig"); //show_image(crop, "cropped"); //cvWaitKey(0); float *pred = network_predict(net, crop.data); if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); - if(resized.data != im.data) free_image(resized); free_image(im); free_image(crop); top_k(pred, classes, topk, indexes); @@ -403,6 +411,7 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile) if(indexes[j] == class) avg_topk += 1; } + printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]); printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); } } @@ -577,6 +586,8 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi } image im = load_image_color(input, 0, 0); image r = letterbox_image(im, net->w, net->h); + //image r = resize_min(im, 320); + //printf("%d %d\n", r.w, r.h); //resize_network(net, r.w, r.h); //printf("%d %d\n", r.w, r.h); @@ -704,6 +715,44 @@ void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_ } } +void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile) +{ + int i,j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + //char *label_list = option_find_str(options, "names", "data/labels.list"); + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(listfile); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + + printf("%s", paths[i]); + for(j = 0; j < classes; ++j){ + printf("\t%g", pred[j]); + } + printf("\n"); + } +} + void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) { @@ -914,6 +963,8 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) { #ifdef OPENCV + char *base = basecfg(cfgfile); + image **alphabet = load_alphabet(); printf("Classifier Demo\n"); network *net = load_network(cfgfile, weightfile, 0); set_batch_network(net, 1); @@ -922,22 +973,33 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind srand(2222222); CvCapture * cap; + int w = 1280; + int h = 720; + if(filename){ cap = cvCaptureFromFile(filename); }else{ cap = cvCaptureFromCAM(cam_index); } + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + int top = option_find_int(options, "top", 1); - char *name_list = option_find_str(options, "names", 0); + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); char **names = get_labels(name_list); int *indexes = calloc(top, sizeof(int)); if(!cap) error("Couldn't connect to webcam.\n"); - cvNamedWindow("Classifier", CV_WINDOW_NORMAL); - cvResizeWindow("Classifier", 512, 512); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); float fps = 0; int i; @@ -946,8 +1008,8 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind gettimeofday(&tval_before, NULL); image in = get_image_from_stream(cap); - image in_s = resize_image(in, net->w, net->h); - show_image(in, "Classifier"); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); float *predictions = network_predict(net, in_s.data); if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); @@ -957,11 +1019,24 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind printf("\033[1;1H"); printf("\nFPS:%.0f\n",fps); + int lh = in.h*.03; + int toph = 3*lh; + + float rgb[3] = {1,1,1}; for(i = 0; i < top; ++i){ + printf("%d\n", toph); int index = indexes[i]; printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + + char buff[1024]; + sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]); + image label = get_label(alphabet, buff, lh); + draw_label(in, toph, lh, label, rgb); + toph += 2*lh; + free_image(label); } + show_image(in, base); free_image(in_s); free_image(in); @@ -998,6 +1073,7 @@ void run_classifier(int argc, char **argv) char *layer_s = (argc > 7) ? argv[7]: 0; int layer = layer_s ? atoi(layer_s) : -1; if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename); else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear); else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); diff --git a/examples/coco.c b/examples/coco.c index 77e04bbf..6d9638c4 100644 --- a/examples/coco.c +++ b/examples/coco.c @@ -94,14 +94,14 @@ void train_coco(char *cfgfile, char *weightfile) save_weights(net, buff); } -void print_cocos(FILE *fp, int image_id, box *boxes, float **probs, int num_boxes, int classes, int w, int h) +static void print_cocos(FILE *fp, int image_id, detection *dets, int num_boxes, int classes, int w, int h) { int i, j; for(i = 0; i < num_boxes; ++i){ - float xmin = boxes[i].x - boxes[i].w/2.; - float xmax = boxes[i].x + boxes[i].w/2.; - float ymin = boxes[i].y - boxes[i].h/2.; - float ymax = boxes[i].y + boxes[i].h/2.; + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; if (xmin < 0) xmin = 0; if (ymin < 0) ymin = 0; @@ -114,7 +114,7 @@ void print_cocos(FILE *fp, int image_id, box *boxes, float **probs, int num_boxe float bh = ymax - ymin; for(j = 0; j < classes; ++j){ - if (probs[i][j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, probs[i][j]); + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); } } } @@ -140,17 +140,13 @@ void validate_coco(char *cfg, char *weights) layer l = net->layers[net->n-1]; int classes = l.classes; - int side = l.side; - int j; char buff[1024]; snprintf(buff, 1024, "%s/coco_results.json", base); FILE *fp = fopen(buff, "w"); fprintf(fp, "[\n"); - box *boxes = calloc(side*side*l.n, sizeof(box)); - float **probs = calloc(side*side*l.n, sizeof(float *)); - for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); + detection *dets = make_network_boxes(net); int m = plist->size; int i=0; @@ -199,9 +195,9 @@ void validate_coco(char *cfg, char *weights) network_predict(net, X); int w = val[t].w; int h = val[t].h; - get_detection_boxes(l, w, h, thresh, probs, boxes, 0); - if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, iou_thresh); - print_cocos(fp, image_id, boxes, probs, side*side*l.n, classes, w, h); + fill_network_boxes(net, w, h, thresh, 0, 0, 0, dets); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_cocos(fp, image_id, dets, l.side*l.side*l.n, classes, w, h); free_image(val[t]); free_image(val_resized[t]); } @@ -235,9 +231,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile) snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); fps[j] = fopen(buff, "w"); } - box *boxes = calloc(side*side*l.n, sizeof(box)); - float **probs = calloc(side*side*l.n, sizeof(float *)); - for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); + detection *dets = make_network_boxes(net); int m = plist->size; int i=0; @@ -245,7 +239,6 @@ void validate_coco_recall(char *cfgfile, char *weightfile) float thresh = .001; int nms = 0; float iou_thresh = .5; - float nms_thresh = .5; int total = 0; int correct = 0; @@ -258,8 +251,9 @@ void validate_coco_recall(char *cfgfile, char *weightfile) image sized = resize_image(orig, net->w, net->h); char *id = basecfg(path); network_predict(net, sized.data); - get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1); - if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh); + + fill_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, dets); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); char labelpath[4096]; find_replace(path, "images", "labels", labelpath); @@ -270,7 +264,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile) int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); for(k = 0; k < side*side*l.n; ++k){ - if(probs[k][0] > thresh){ + if(dets[k].objectness > thresh){ ++proposals; } } @@ -279,8 +273,8 @@ void validate_coco_recall(char *cfgfile, char *weightfile) box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; float best_iou = 0; for(k = 0; k < side*side*l.n; ++k){ - float iou = box_iou(boxes[k], t); - if(probs[k][0] > thresh && iou > best_iou){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ best_iou = iou; } } @@ -308,10 +302,7 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) clock_t time; char buff[256]; char *input = buff; - int j; - box *boxes = calloc(l.side*l.side*l.n, sizeof(box)); - float **probs = calloc(l.side*l.side*l.n, sizeof(float *)); - for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); + detection *dets = make_network_boxes(net); while(1){ if(filename){ strncpy(input, filename, 256); @@ -328,9 +319,11 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) time=clock(); network_predict(net, X); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0); - if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms); - draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, 0, coco_classes, alphabet, 80); + + fill_network_boxes(net, 1, 1, thresh, 0, 0, 0, dets); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80); save_image(im, "prediction"); show_image(im, "predictions"); free_image(im); diff --git a/examples/darknet.c b/examples/darknet.c index b89f69ab..9757eb18 100644 --- a/examples/darknet.c +++ b/examples/darknet.c @@ -12,7 +12,6 @@ extern void run_coco(int argc, char **argv); extern void run_captcha(int argc, char **argv); extern void run_nightmare(int argc, char **argv); extern void run_classifier(int argc, char **argv); -extern void run_attention(int argc, char **argv); extern void run_regressor(int argc, char **argv); extern void run_segmenter(int argc, char **argv); extern void run_char_rnn(int argc, char **argv); @@ -189,6 +188,25 @@ void partial(char *cfgfile, char *weightfile, char *outfile, int max) save_weights_upto(net, outfile, max); } +void print_weights(char *cfgfile, char *weightfile, int n) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + layer l = net->layers[n]; + int i, j; + //printf("["); + for(i = 0; i < l.n; ++i){ + //printf("["); + for(j = 0; j < l.size*l.size*l.c; ++j){ + //if(j > 0) printf(","); + printf("%g ", l.weights[i*l.size*l.size*l.c + j]); + } + printf("\n"); + //printf("]%s\n", (i == l.n-1)?"":","); + } + //printf("]"); +} + void rescale_net(char *cfgfile, char *weightfile, char *outfile) { gpu_index = -1; @@ -432,8 +450,6 @@ int main(int argc, char **argv) predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); } else if (0 == strcmp(argv[1], "classifier")){ run_classifier(argc, argv); - } else if (0 == strcmp(argv[1], "attention")){ - run_attention(argc, argv); } else if (0 == strcmp(argv[1], "regressor")){ run_regressor(argc, argv); } else if (0 == strcmp(argv[1], "segmenter")){ @@ -470,6 +486,8 @@ int main(int argc, char **argv) oneoff(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "oneoff2")){ oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "print")){ + print_weights(argv[2], argv[3], atoi(argv[4])); } else if (0 == strcmp(argv[1], "partial")){ partial(argv[2], argv[3], argv[4], atoi(argv[5])); } else if (0 == strcmp(argv[1], "average")){ diff --git a/examples/detector.c b/examples/detector.c index 15575331..5bf8ac40 100644 --- a/examples/detector.c +++ b/examples/detector.c @@ -2,6 +2,7 @@ static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) { list *options = read_data_cfg(datacfg); @@ -73,6 +74,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i free_data(train); load_thread = load_data(args); + #pragma omp parallel for for(i = 0; i < ngpus; ++i){ resize_network(nets[i], dim, dim); } @@ -84,28 +86,28 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i load_thread = load_data(args); /* - int k; - for(k = 0; k < l.max_boxes; ++k){ - box b = float_to_box(train.y.vals[10] + 1 + k*5); - if(!b.x) break; - printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); - } - */ + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[10] + 1 + k*5); + if(!b.x) break; + printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); + } + */ /* - int zz; - for(zz = 0; zz < train.X.cols; ++zz){ - image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); - int k; - for(k = 0; k < l.max_boxes; ++k){ - box b = float_to_box(train.y.vals[zz] + k*5, 1); - printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); - draw_bbox(im, b, 1, 1,0,0); - } - show_image(im, "truth11"); - cvWaitKey(0); - save_image(im, "truth11"); - } - */ + int zz; + for(zz = 0; zz < train.X.cols; ++zz){ + image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[zz] + k*5, 1); + printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + draw_bbox(im, b, 1, 1,0,0); + } + show_image(im, "truth11"); + cvWaitKey(0); + save_image(im, "truth11"); + } + */ printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); @@ -158,15 +160,15 @@ static int get_coco_image_id(char *filename) return atoi(p+1); } -static void print_cocos(FILE *fp, char *image_path, box *boxes, float **probs, int num_boxes, int classes, int w, int h) +static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) { int i, j; int image_id = get_coco_image_id(image_path); for(i = 0; i < num_boxes; ++i){ - float xmin = boxes[i].x - boxes[i].w/2.; - float xmax = boxes[i].x + boxes[i].w/2.; - float ymin = boxes[i].y - boxes[i].h/2.; - float ymax = boxes[i].y + boxes[i].h/2.; + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; if (xmin < 0) xmin = 0; if (ymin < 0) ymin = 0; @@ -179,19 +181,19 @@ static void print_cocos(FILE *fp, char *image_path, box *boxes, float **probs, i float bh = ymax - ymin; for(j = 0; j < classes; ++j){ - if (probs[i][j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, probs[i][j]); + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); } } } -void print_detector_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h) +void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) { int i, j; for(i = 0; i < total; ++i){ - float xmin = boxes[i].x - boxes[i].w/2. + 1; - float xmax = boxes[i].x + boxes[i].w/2. + 1; - float ymin = boxes[i].y - boxes[i].h/2. + 1; - float ymax = boxes[i].y + boxes[i].h/2. + 1; + float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1; if (xmin < 1) xmin = 1; if (ymin < 1) ymin = 1; @@ -199,20 +201,20 @@ void print_detector_detections(FILE **fps, char *id, box *boxes, float **probs, if (ymax > h) ymax = h; for(j = 0; j < classes; ++j){ - if (probs[i][j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, probs[i][j], + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], xmin, ymin, xmax, ymax); } } } -void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int total, int classes, int w, int h) +void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) { int i, j; for(i = 0; i < total; ++i){ - float xmin = boxes[i].x - boxes[i].w/2.; - float xmax = boxes[i].x + boxes[i].w/2.; - float ymin = boxes[i].y - boxes[i].h/2.; - float ymax = boxes[i].y + boxes[i].h/2.; + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; if (xmin < 0) xmin = 0; if (ymin < 0) ymin = 0; @@ -221,7 +223,7 @@ void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int for(j = 0; j < classes; ++j){ int class = j; - if (probs[i][class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, probs[i][class], + if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class], xmin, ymin, xmax, ymax); } } @@ -277,10 +279,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char } } - - box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); - float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); - for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes+1, sizeof(float *)); + detection *dets = make_network_boxes(net); int m = plist->size; int i=0; @@ -334,14 +333,14 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char network_predict(net, input.data); int w = val[t].w; int h = val[t].h; - get_region_boxes(l, w, h, net->w, net->h, thresh, probs, boxes, 0, 0, map, .5, 0); - if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms); + fill_network_boxes(net, w, h, thresh, .5, map, 0, dets); + if (nms) do_nms_sort(dets, l.w*l.h*l.n, classes, nms); if (coco){ - print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h); + print_cocos(fp, path, dets, l.w*l.h*l.n, classes, w, h); } else if (imagenet){ - print_imagenet_detections(fp, i+t-nthreads+1, boxes, probs, l.w*l.h*l.n, classes, w, h); + print_imagenet_detections(fp, i+t-nthreads+1, dets, l.w*l.h*l.n, classes, w, h); } else { - print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h); + print_detector_detections(fps, id, dets, l.w*l.h*l.n, classes, w, h); } free(id); free_image(val[t]); @@ -410,10 +409,8 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out } } - - box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); - float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); - for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes+1, sizeof(float *)); + detection *dets = make_network_boxes(net); + int nboxes = num_boxes(net); int m = plist->size; int i=0; @@ -462,14 +459,14 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out network_predict(net, X); int w = val[t].w; int h = val[t].h; - get_region_boxes(l, w, h, net->w, net->h, thresh, probs, boxes, 0, 0, map, .5, 0); - if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms); + fill_network_boxes(net, w, h, thresh, .5, map, 0, dets); + if (nms) do_nms_sort(dets, nboxes, classes, nms); if (coco){ - print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h); + print_cocos(fp, path, dets, nboxes, classes, w, h); } else if (imagenet){ - print_imagenet_detections(fp, i+t-nthreads+1, boxes, probs, l.w*l.h*l.n, classes, w, h); + print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h); } else { - print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h); + print_detector_detections(fps, id, dets, nboxes, classes, w, h); } free(id); free_image(val[t]); @@ -498,12 +495,9 @@ void validate_detector_recall(char *cfgfile, char *weightfile) char **paths = (char **)list_to_array(plist); layer l = net->layers[net->n-1]; - int classes = l.classes; int j, k; - box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); - float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); - for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes+1, sizeof(float *)); + detection *dets = make_network_boxes(net); int m = plist->size; int i=0; @@ -516,6 +510,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile) int correct = 0; int proposals = 0; float avg_iou = 0; + int nboxes = num_boxes(net); for(i = 0; i < m; ++i){ char *path = paths[i]; @@ -523,8 +518,8 @@ void validate_detector_recall(char *cfgfile, char *weightfile) image sized = resize_image(orig, net->w, net->h); char *id = basecfg(path); network_predict(net, sized.data); - get_region_boxes(l, sized.w, sized.h, net->w, net->h, thresh, probs, boxes, 0, 1, 0, .5, 1); - if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms); + fill_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, dets); + if (nms) do_nms_obj(dets, nboxes, 1, nms); char labelpath[4096]; find_replace(path, "images", "labels", labelpath); @@ -534,8 +529,8 @@ void validate_detector_recall(char *cfgfile, char *weightfile) int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); - for(k = 0; k < l.w*l.h*l.n; ++k){ - if(probs[k][0] > thresh){ + for(k = 0; k < nboxes; ++k){ + if(dets[k].objectness > thresh){ ++proposals; } } @@ -544,8 +539,8 @@ void validate_detector_recall(char *cfgfile, char *weightfile) box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; float best_iou = 0; for(k = 0; k < l.w*l.h*l.n; ++k){ - float iou = box_iou(boxes[k], t); - if(probs[k][0] > thresh && iou > best_iou){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ best_iou = iou; } } @@ -562,6 +557,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile) } } + void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) { list *options = read_data_cfg(datacfg); @@ -575,7 +571,6 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam double time; char buff[256]; char *input = buff; - int j; float nms=.3; while(1){ if(filename){ @@ -595,23 +590,18 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam //resize_network(net, sized.w, sized.h); layer l = net->layers[net->n-1]; - box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); - float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); - for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *)); - float **masks = 0; - if (l.coords > 4){ - masks = calloc(l.w*l.h*l.n, sizeof(float*)); - for(j = 0; j < l.w*l.h*l.n; ++j) masks[j] = calloc(l.coords-4, sizeof(float *)); - } + int nboxes = num_boxes(net); + printf("%d\n", nboxes); float *X = sized.data; time=what_time_is_it_now(); network_predict(net, X); printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); - get_region_boxes(l, im.w, im.h, net->w, net->h, thresh, probs, boxes, masks, 0, 0, hier_thresh, 1); + detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1); //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); - if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); - draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, masks, names, alphabet, l.classes); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); + free_detections(dets, num_boxes(net)); if(outfile){ save_image(im, outfile); } @@ -630,12 +620,190 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam free_image(im); free_image(sized); - free(boxes); - free_ptrs((void **)probs, l.w*l.h*l.n); if (filename) break; } } +void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ + image **alphabet = load_alphabet(); + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + int nboxes = num_boxes(net); + + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int left = b.x-b.w/2.; + int top = b.y-b.h/2.; + censor_image(in, left, top, b.w, b.h); + } + } + show_image(in, base); + cvWaitKey(10); + free_detections(dets, num_boxes(net)); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } +} + +void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ + image **alphabet = load_alphabet(); + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + int nboxes = num_boxes(net); + show_image(in, base); + + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h; + int dx = b.x*in.w-size/2.; + int dy = b.y*in.h-size/2.; + image bim = crop_image(in, dx, dy, size, size); + char buff[2048]; + sprintf(buff, "results/extract/%07d", count); + ++count; + save_image(bim, buff); + free_image(bim); + } + } + free_detections(dets, num_boxes(net)); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } +} + +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + int nboxes = num_boxes(net); + fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); +} + void run_detector(int argc, char **argv) { char *prefix = find_char_arg(argc, argv, "-prefix", 0); @@ -677,12 +845,15 @@ void run_detector(int argc, char **argv) int width = find_int_arg(argc, argv, "-w", 0); int height = find_int_arg(argc, argv, "-h", 0); int fps = find_int_arg(argc, argv, "-fps", 0); + int class = find_int_arg(argc, argv, "-class", 0); char *datacfg = argv[3]; char *cfg = argv[4]; char *weights = (argc > 5) ? argv[5] : 0; char *filename = (argc > 6) ? argv[6]: 0; if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); + else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); + else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); diff --git a/examples/lsd.c b/examples/lsd.c index c5977483..ad337644 100644 --- a/examples/lsd.c +++ b/examples/lsd.c @@ -383,7 +383,31 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear } */ -void test_dcgan(char *cfgfile, char *weightfile) +void slerp(float *start, float *end, float s, int n, float *out) +{ + float omega = acos(dot_cpu(n, start, 1, end, 1)); + float so = sin(omega); + fill_cpu(n, 0, out, 1); + axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); + axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); + + float mag = mag_array(out, n); + scale_array(out, n, 1./mag); +} + +image random_unit_vector_image(int w, int h, int c) +{ + image im = make_image(w, h, c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + float mag = mag_array(im.data, im.w*im.h*im.c); + scale_array(im.data, im.w*im.h*im.c, 1./mag); + return im; +} + +void inter_dcgan(char *cfgfile, char *weightfile) { network *net = load_network(cfgfile, weightfile, 0); set_batch_network(net, 1); @@ -401,6 +425,62 @@ void test_dcgan(char *cfgfile, char *weightfile) break; } } + image start = random_unit_vector_image(net->w, net->h, net->c); + image end = random_unit_vector_image(net->w, net->h, net->c); + image im = make_image(net->w, net->h, net->c); + image orig = copy_image(start); + + int c = 0; + int count = 0; + int max_count = 15; + while(1){ + ++c; + + if(count == max_count){ + count = 0; + free_image(start); + start = end; + end = random_unit_vector_image(net->w, net->h, net->c); + if(c > 300){ + end = orig; + } + if(c>300 + max_count) return; + } + ++count; + + slerp(start.data, end.data, (float)count / max_count, im.w*im.h*im.c, im.data); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + //char buff[256]; + sprintf(buff, "out%05d", c); + show_image(out, "out"); + save_image(out, "out"); + save_image(out, buff); +#ifdef OPENCV + //cvWaitKey(0); +#endif + + } +} + +void test_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + imlayer = net->n-1; while(1){ image im = make_image(net->w, net->h, net->c); @@ -408,6 +488,8 @@ void test_dcgan(char *cfgfile, char *weightfile) for(i = 0; i < im.w*im.h*im.c; ++i){ im.data[i] = rand_normal(); } + //float mag = mag_array(im.data, im.w*im.h*im.c); + //scale_array(im.data, im.w*im.h*im.c, 1./mag); float *X = im.data; time=clock(); @@ -426,21 +508,177 @@ void test_dcgan(char *cfgfile, char *weightfile) } } -void dcgan_batch(network gnet, network anet) +void set_network_alpha_beta(network *net, float alpha, float beta) { - //float *input = calloc(x_size, sizeof(float)); + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == SHORTCUT){ + net->layers[i].alpha = alpha; + net->layers[i].beta = beta; + } + } } - -void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images) +void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = gnet->layers[gnet->n-1]; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + float aloss_avg = -1; + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + { + int cb = get_current_batch(gnet); + float alpha = (float) cb / (maxbatch/2); + if(alpha > 1) alpha = 1; + float beta = 1 - alpha; + printf("%f %f\n", alpha, beta); + set_network_alpha_beta(gnet, alpha, beta); + set_network_alpha_beta(anet, beta, alpha); + } + + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for (j = 0; j < gnet->subdivisions; ++j) { + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + /* + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + */ + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + float genaloss = *anet->cost / anet->batch; + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + float aloss = train_network(anet, merge); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen"); + show_image(im2, "train"); + save_image(im, "gen"); + save_image(im2, "train"); + cvWaitKey(1); + } +#endif + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) { #ifdef GPU - //char *train_images = "/home/pjreddie/data/coco/train1.txt"; - //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; - //char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; - //char *train_images = "data/64.txt"; - //char *train_images = "data/alp.txt"; - //char *train_images = "data/cifar.txt"; char *backup_directory = "/home/pjreddie/backup/"; srand(time(0)); char *base = basecfg(cfg); @@ -450,7 +688,6 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, network *anet = load_network(acfg, aweight, clear); //float orig_rate = anet->learning_rate; - int start = 0; int i, j, k; layer imlayer = {0}; for (i = 0; i < gnet->n; ++i) { @@ -497,8 +734,8 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, //data generated = copy_data(train); - while (get_current_batch(gnet) < gnet->max_batches) { - start += 1; + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { i += 1; time=clock(); pthread_join(load_thread, 0); @@ -513,8 +750,8 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, data gen = copy_data(train); for (j = 0; j < imgs; ++j) { - train.y.vals[j][0] = .95; - gen.y.vals[j][0] = .05; + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; } time=clock(); @@ -524,31 +761,50 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, for(z = 0; z < x_size; ++z){ gnet->input[z] = rand_normal(); } + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + /* + for(z = 0; z < 100; ++z){ + printf("%f, ", gnet->input[z]); + } + printf("\n"); + printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); + */ - cuda_push_array(gnet->input_gpu, gnet->input, x_size); - cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); + //cuda_push_array(gnet->input_gpu, gnet->input, x_size); + //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); *gnet->seen += gnet->batch; - forward_network_gpu(gnet); + forward_network(gnet); fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); - fill_gpu(anet->truths*anet->batch, .95, anet->truth_gpu, 1); - copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); anet->delta_gpu = imerror; - forward_network_gpu(anet); - backward_network_gpu(anet); + forward_network(anet); + backward_network(anet); float genaloss = *anet->cost / anet->batch; - printf("%f\n", genaloss); + //printf("%f\n", genaloss); scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); - scal_gpu(imlayer.outputs*imlayer.batch, .00, gnet->layers[gnet->n-1].delta_gpu, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); - printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); - printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); - backward_network_gpu(gnet); + backward_network(gnet); + + /* + for(k = 0; k < gnet->n; ++k){ + layer l = gnet->layers[k]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); + printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); + } + */ for(k = 0; k < gnet->batch; ++k){ int index = j*gnet->batch + k; @@ -565,23 +821,25 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, //scale_image(im, .5); //translate_image(im2, 1); //scale_image(im2, .5); - #ifdef OPENCV +#ifdef OPENCV if(display){ image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); show_image(im, "gen"); show_image(im2, "train"); - cvWaitKey(50); + save_image(im, "gen"); + save_image(im2, "train"); + cvWaitKey(1); } - #endif +#endif -/* - if(aloss < .1){ - anet->learning_rate = 0; - } else if (aloss > .3){ - anet->learning_rate = orig_rate; - } - */ + /* + if(aloss < .1){ + anet->learning_rate = 0; + } else if (aloss > .3){ + anet->learning_rate = orig_rate; + } + */ update_network_gpu(gnet); @@ -747,15 +1005,15 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle update_network_gpu(net); - #ifdef OPENCV +#ifdef OPENCV if(display){ image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); show_image(im, "gen"); show_image(im2, "train"); - cvWaitKey(50); + cvWaitKey(1); } - #endif +#endif free_data(merge); free_data(train); free_data(gray); @@ -786,259 +1044,259 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle } /* -void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) -{ + void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) + { #ifdef GPU - char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; - char *backup_directory = "/home/pjreddie/backup/"; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - if(clear) *net->seen = 0; +char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; +char *backup_directory = "/home/pjreddie/backup/"; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ +load_weights(&net, weightfile); +} +if(clear) *net->seen = 0; - char *abase = basecfg(acfgfile); - network anet = parse_network_cfg(acfgfile); - if(aweightfile){ - load_weights(&anet, aweightfile); - } - if(clear) *anet->seen = 0; +char *abase = basecfg(acfgfile); +network anet = parse_network_cfg(acfgfile); +if(aweightfile){ +load_weights(&anet, aweightfile); +} +if(clear) *anet->seen = 0; - int i, j, k; - layer imlayer = {0}; - for (i = 0; i < net->n; ++i) { - if (net->layers[i].out_c == 3) { - imlayer = net->layers[i]; - break; +int i, j, k; +layer imlayer = {0}; +for (i = 0; i < net->n; ++i) { +if (net->layers[i].out_c == 3) { +imlayer = net->layers[i]; +break; +} +} + +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = net->batch*net->subdivisions; +i = *net->seen/imgs; +data train, buffer; + + +list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; + +network_state gstate = {0}; +gstate.index = 0; +gstate.net = net; +int x_size = get_network_input_size(net)*net->batch; +int y_size = 1*net->batch; +gstate.input = cuda_make_array(0, x_size); +gstate.truth = 0; +gstate.delta = 0; +gstate.train = 1; +float *X = calloc(x_size, sizeof(float)); +float *y = calloc(y_size, sizeof(float)); + +network_state astate = {0}; +astate.index = 0; +astate.net = anet; +int ay_size = get_network_output_size(anet)*anet->batch; +astate.input = 0; +astate.truth = 0; +astate.delta = 0; +astate.train = 1; + +float *imerror = cuda_make_array(0, imlayer.outputs); +float *ones_gpu = cuda_make_array(0, ay_size); +fill_gpu(ay_size, 1, ones_gpu, 1); + +float aloss_avg = -1; +float gloss_avg = -1; + +//data generated = copy_data(train); + +while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, X, y); + cuda_push_array(gstate.input, X, x_size); + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, 1, imerror, 1); + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + backward_network_gpu(net, gstate); + + printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs)); + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = 0; } } + harmless_update_network_gpu(anet); - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); - int imgs = net->batch*net->subdivisions; - i = *net->seen/imgs; - data train, buffer; + data merge = concat_data(train, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(generated); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; - list *plist = get_paths(train_images); - //int N = plist->size; - char **paths = (char **)list_to_array(plist); - - load_args args = {0}; - args.w = net->w; - args.h = net->h; - args.paths = paths; - args.n = imgs; - args.m = plist->size; - args.d = &buffer; - - args.min = net->min_crop; - args.max = net->max_crop; - args.angle = net->angle; - args.aspect = net->aspect; - args.exposure = net->exposure; - args.saturation = net->saturation; - args.hue = net->hue; - args.size = net->w; - args.type = CLASSIFICATION_DATA; - args.classes = 1; - char *ls[1] = {"coco"}; - args.labels = ls; - - pthread_t load_thread = load_data_in_thread(args); - clock_t time; - - network_state gstate = {0}; - gstate.index = 0; - gstate.net = net; - int x_size = get_network_input_size(net)*net->batch; - int y_size = 1*net->batch; - gstate.input = cuda_make_array(0, x_size); - gstate.truth = 0; - gstate.delta = 0; - gstate.train = 1; - float *X = calloc(x_size, sizeof(float)); - float *y = calloc(y_size, sizeof(float)); - - network_state astate = {0}; - astate.index = 0; - astate.net = anet; - int ay_size = get_network_output_size(anet)*anet->batch; - astate.input = 0; - astate.truth = 0; - astate.delta = 0; - astate.train = 1; - - float *imerror = cuda_make_array(0, imlayer.outputs); - float *ones_gpu = cuda_make_array(0, ay_size); - fill_gpu(ay_size, 1, ones_gpu, 1); - - float aloss_avg = -1; - float gloss_avg = -1; - - //data generated = copy_data(train); - - while (get_current_batch(net) < net->max_batches) { - i += 1; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data_in_thread(args); - - printf("Loaded: %lf seconds\n", sec(clock()-time)); - - data generated = copy_data(train); - time=clock(); - float gloss = 0; - - for(j = 0; j < net->subdivisions; ++j){ - get_next_batch(train, net->batch, j*net->batch, X, y); - cuda_push_array(gstate.input, X, x_size); - *net->seen += net->batch; - forward_network_gpu(net, gstate); - - fill_gpu(imlayer.outputs, 0, imerror, 1); - astate.input = imlayer.output_gpu; - astate.delta = imerror; - astate.truth = ones_gpu; - forward_network_gpu(anet, astate); - backward_network_gpu(anet, astate); - - scal_gpu(imlayer.outputs, 1, imerror, 1); - axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); - - backward_network_gpu(net, gstate); - - printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs)); - printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); - - gloss += get_network_cost(net) /(net->subdivisions*net->batch); - - cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); - for(k = 0; k < net->batch; ++k){ - int index = j*net->batch + k; - copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); - generated.y.vals[index][0] = 0; - } - } - harmless_update_network_gpu(anet); - - data merge = concat_data(train, generated); - randomize_data(merge); - float aloss = train_network(anet, merge); - - update_network_gpu(net); - update_network_gpu(anet); - free_data(merge); - free_data(train); - free_data(generated); - if (aloss_avg < 0) aloss_avg = aloss; - aloss_avg = aloss_avg*.9 + aloss*.1; - gloss_avg = gloss_avg*.9 + gloss*.1; - - printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); - if(i%1000==0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); - save_weights(anet, buff); - } - if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); - save_weights(net, buff); - sprintf(buff, "%s/%s.backup", backup_directory, abase); - save_weights(anet, buff); - } + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); #endif } */ /* -void train_lsd(char *cfgfile, char *weightfile, int clear) -{ - char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; - char *backup_directory = "/home/pjreddie/backup/"; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - float avg_loss = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - if(clear) *net->seen = 0; - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); - int imgs = net->batch*net->subdivisions; - int i = *net->seen/imgs; - data train, buffer; + void train_lsd(char *cfgfile, char *weightfile, int clear) + { + char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + if(clear) *net->seen = 0; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; - list *plist = get_paths(train_images); - //int N = plist->size; - char **paths = (char **)list_to_array(plist); + list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); - load_args args = {0}; - args.w = net->w; - args.h = net->h; - args.paths = paths; - args.n = imgs; - args.m = plist->size; - args.d = &buffer; +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; - args.min = net->min_crop; - args.max = net->max_crop; - args.angle = net->angle; - args.aspect = net->aspect; - args.exposure = net->exposure; - args.saturation = net->saturation; - args.hue = net->hue; - args.size = net->w; - args.type = CLASSIFICATION_DATA; - args.classes = 1; - char *ls[1] = {"coco"}; - args.labels = ls; +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; - pthread_t load_thread = load_data_in_thread(args); - clock_t time; - //while(i*imgs < N*120){ - while(get_current_batch(net) < net->max_batches){ - i += 1; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data_in_thread(args); +pthread_t load_thread = load_data_in_thread(args); +clock_t time; +//while(i*imgs < N*120){ +while(get_current_batch(net) < net->max_batches){ +i += 1; +time=clock(); +pthread_join(load_thread, 0); +train = buffer; +load_thread = load_data_in_thread(args); - printf("Loaded: %lf seconds\n", sec(clock()-time)); +printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); - float loss = train_network(net, train); - if (avg_loss < 0) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; +time=clock(); +float loss = train_network(net, train); +if (avg_loss < 0) avg_loss = loss; +avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); - if(i%1000==0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); - save_weights(net, buff); - } - free_data(train); - } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); +printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); +if(i%1000==0){ +char buff[256]; +sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); +save_weights(net, buff); +} +if(i%100==0){ +char buff[256]; +sprintf(buff, "%s/%s.backup", backup_directory, base); +save_weights(net, buff); +} +free_data(train); +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); } */ @@ -1107,6 +1365,7 @@ void run_lsd(int argc, char **argv) int clear = find_arg(argc, argv, "-clear"); int display = find_arg(argc, argv, "-display"); + int batches = find_int_arg(argc, argv, "-b", 0); char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list"); char *cfg = argv[3]; @@ -1118,9 +1377,11 @@ void run_lsd(int argc, char **argv) //else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear); //else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear); //else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear); - if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file); + if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "trainprog")) train_prog(cfg, weights, acfg, aweights, clear, display, file, batches); else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear, display); else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "inter")) inter_dcgan(cfg, weights); else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0); else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1); /* diff --git a/examples/nightmare.c b/examples/nightmare.c index 71d38334..8ec6e966 100644 --- a/examples/nightmare.c +++ b/examples/nightmare.c @@ -83,6 +83,10 @@ void optimize_picture(network *net, image orig, int max_layer, float scale, floa */ //rate = rate / abs_mean(out.data, out.w*out.h*out.c); + image gray = make_image(out.w, out.h, out.c); + fill_image(gray, .5); + axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1); + axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1); if(norm) normalize_array(out.data, out.w*out.h*out.c); axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); diff --git a/examples/regressor.c b/examples/regressor.c index 00c55aae..60a9f2b9 100644 --- a/examples/regressor.c +++ b/examples/regressor.c @@ -32,6 +32,7 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, char *backup_directory = option_find_str(options, "backup", "/backup/"); char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 1); list *plist = get_paths(train_list); char **paths = (char **)list_to_array(plist); @@ -43,9 +44,10 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, args.w = net->w; args.h = net->h; args.threads = 32; + args.classes = classes; - args.min = net->min_crop; - args.max = net->max_crop; + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; args.angle = net->angle; args.aspect = net->aspect; args.exposure = net->exposure; @@ -160,6 +162,10 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde }else{ cap = cvCaptureFromCAM(cam_index); } + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 1); + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); if(!cap) error("Couldn't connect to webcam.\n"); cvNamedWindow("Regressor", CV_WINDOW_NORMAL); @@ -171,19 +177,23 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde gettimeofday(&tval_before, NULL); image in = get_image_from_stream(cap); - image in_s = letterbox_image(in, net->w, net->h); - show_image(in, "Regressor"); + image crop = center_crop_image(in, net->w, net->h); + grayscale_image_3c(crop); + show_image(crop, "Regressor"); - float *predictions = network_predict(net, in_s.data); + float *predictions = network_predict(net, crop.data); printf("\033[2J"); printf("\033[1;1H"); printf("\nFPS:%.0f\n",fps); - printf("People: %f\n", predictions[0]); + int i; + for(i = 0; i < classes; ++i){ + printf("%s: %f\n", names[i], predictions[i]); + } - free_image(in_s); free_image(in); + free_image(crop); cvWaitKey(10); diff --git a/examples/super.c b/examples/super.c index 506b065e..79799d05 100644 --- a/examples/super.c +++ b/examples/super.c @@ -93,6 +93,8 @@ void test_super(char *cfgfile, char *weightfile, char *filename) image out = get_network_image(net); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); save_image(out, "out"); + show_image(out, "out"); + cvWaitKey(0); free_image(im); if (filename) break; diff --git a/examples/yolo.c b/examples/yolo.c index 9174b401..af4b8b50 100644 --- a/examples/yolo.c +++ b/examples/yolo.c @@ -74,14 +74,14 @@ void train_yolo(char *cfgfile, char *weightfile) save_weights(net, buff); } -void print_yolo_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h) +void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) { int i, j; for(i = 0; i < total; ++i){ - float xmin = boxes[i].x - boxes[i].w/2.; - float xmax = boxes[i].x + boxes[i].w/2.; - float ymin = boxes[i].y - boxes[i].h/2.; - float ymax = boxes[i].y + boxes[i].h/2.; + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; if (xmin < 0) xmin = 0; if (ymin < 0) ymin = 0; @@ -89,7 +89,7 @@ void print_yolo_detections(FILE **fps, char *id, box *boxes, float **probs, int if (ymax > h) ymax = h; for(j = 0; j < classes; ++j){ - if (probs[i][j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, probs[i][j], + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], xmin, ymin, xmax, ymax); } } @@ -118,9 +118,6 @@ void validate_yolo(char *cfg, char *weights) snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); fps[j] = fopen(buff, "w"); } - box *boxes = calloc(l.side*l.side*l.n, sizeof(box)); - float **probs = calloc(l.side*l.side*l.n, sizeof(float *)); - for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); int m = plist->size; int i=0; @@ -136,6 +133,7 @@ void validate_yolo(char *cfg, char *weights) image *buf = calloc(nthreads, sizeof(image)); image *buf_resized = calloc(nthreads, sizeof(image)); pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + detection *dets = make_network_boxes(net); load_args args = {0}; args.w = net->w; @@ -169,9 +167,9 @@ void validate_yolo(char *cfg, char *weights) network_predict(net, X); int w = val[t].w; int h = val[t].h; - get_detection_boxes(l, w, h, thresh, probs, boxes, 0); - if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, classes, iou_thresh); - print_yolo_detections(fps, id, boxes, probs, l.side*l.side*l.n, classes, w, h); + fill_network_boxes(net, w, h, thresh, 0, 0, 0, dets); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets); free(id); free_image(val[t]); free_image(val_resized[t]); @@ -202,9 +200,7 @@ void validate_yolo_recall(char *cfg, char *weights) snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); fps[j] = fopen(buff, "w"); } - box *boxes = calloc(side*side*l.n, sizeof(box)); - float **probs = calloc(side*side*l.n, sizeof(float *)); - for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); + detection *dets = make_network_boxes(net); int m = plist->size; int i=0; @@ -224,8 +220,9 @@ void validate_yolo_recall(char *cfg, char *weights) image sized = resize_image(orig, net->w, net->h); char *id = basecfg(path); network_predict(net, sized.data); - get_detection_boxes(l, orig.w, orig.h, thresh, probs, boxes, 1); - if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms); + + fill_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, dets); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); char labelpath[4096]; find_replace(path, "images", "labels", labelpath); @@ -236,7 +233,7 @@ void validate_yolo_recall(char *cfg, char *weights) int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); for(k = 0; k < side*side*l.n; ++k){ - if(probs[k][0] > thresh){ + if(dets[k].objectness > thresh){ ++proposals; } } @@ -245,8 +242,8 @@ void validate_yolo_recall(char *cfg, char *weights) box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; float best_iou = 0; for(k = 0; k < side*side*l.n; ++k){ - float iou = box_iou(boxes[k], t); - if(probs[k][0] > thresh && iou > best_iou){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ best_iou = iou; } } @@ -273,11 +270,8 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) clock_t time; char buff[256]; char *input = buff; - int j; float nms=.4; - box *boxes = calloc(l.side*l.side*l.n, sizeof(box)); - float **probs = calloc(l.side*l.side*l.n, sizeof(float *)); - for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); + detection *dets = make_network_boxes(net); while(1){ if(filename){ strncpy(input, filename, 256); @@ -294,9 +288,11 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) time=clock(); network_predict(net, X); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0); - if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms); - draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, 0, voc_names, alphabet, 20); + + fill_network_boxes(net, 1, 1, thresh, 0, 0, 0, dets); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20); save_image(im, "predictions"); show_image(im, "predictions"); diff --git a/include/darknet.h b/include/darknet.h index 5fa2ec17..81a01957 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -51,6 +51,7 @@ typedef struct{ int *group_size; int *group_offset; } tree; +tree *read_tree(char *filename); typedef enum{ LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN @@ -85,11 +86,14 @@ typedef enum { XNOR, REGION, REORG, + UPSAMPLE, + LOGXENT, + L2NORM, BLANK } LAYER_TYPE; typedef enum{ - SSE, MASKED, L1, SEG, SMOOTH + SSE, MASKED, L1, SEG, SMOOTH,WGAN } COST_TYPE; typedef struct{ @@ -160,18 +164,20 @@ struct layer{ float shift; float ratio; float learning_rate_scale; + float clip; int softmax; int classes; int coords; int background; int rescore; int objectness; - int does_cost; int joint; int noadjust; int reorg; int log; int tanh; + int *mask; + int total; float alpha; float beta; @@ -184,13 +190,17 @@ struct layer{ float class_scale; int bias_match; int random; + float ignore_thresh; + float truth_thresh; float thresh; + float focus; int classfix; int absolute; int onlyforward; int stopbackward; int dontload; + int dontsave; int dontloadscales; float temperature; @@ -228,6 +238,7 @@ struct layer{ float * delta; float * output; + float * loss; float * squared; float * norms; @@ -389,6 +400,7 @@ struct layer{ float * scale_change_gpu; float * output_gpu; + float * loss_gpu; float * delta_gpu; float * rand_gpu; float * squared_gpu; @@ -470,6 +482,7 @@ typedef struct network{ int train; int index; float *cost; + float clip; #ifdef GPU float *input_gpu; @@ -501,6 +514,15 @@ typedef struct{ float x, y, w, h; } box; +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + typedef struct matrix{ int rows, cols; float **vals; @@ -590,9 +612,11 @@ void backward_network(network *net); void update_network(network *net); +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); void softmax(float *input, int n, float temp, int stride, float *output); @@ -618,6 +642,8 @@ float train_networks(network **nets, int n, data d, int interval); void sync_nets(network **nets, int n, int interval); void harmless_update_network_gpu(network *net); #endif +image get_label(image **characters, char *string, int size); +void draw_label(image a, int r, int c, image label, const float *rgb); void save_image_png(image im, const char *name); void get_next_batch(data d, int n, int offset, float *X, float *y); void grayscale_image_3c(image im); @@ -644,7 +670,7 @@ void rgbgr_weights(layer l); image *get_weights(layer l); void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen); -void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); char *option_find_str(list *l, char *key, char *def); int option_find_int(list *l, char *key, int def); @@ -656,7 +682,7 @@ void save_weights_upto(network *net, char *filename, int cutoff); void load_weights_upto(network *net, char *filename, int start, int cutoff); void zero_objectness(layer l); -void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, float **masks, int only_objectness, int *map, float tree_thresh, int relative); +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); void free_network(network *net); void set_batch_network(network *net, int b); void set_temp_network(network *net, float t); @@ -664,8 +690,10 @@ image load_image(char *filename, int w, int h, int c); image load_image_color(char *filename, int w, int h); image make_image(int w, int h, int c); image resize_image(image im, int w, int h); +void censor_image(image im, int dx, int dy, int w, int h); image letterbox_image(image im, int w, int h); image crop_image(image im, int dx, int dy, int w, int h); +image center_crop_image(image im, int w, int h); image resize_min(image im, int min); image resize_max(image im, int max); image threshold_image(image im, float thresh); @@ -697,11 +725,10 @@ double what_time_is_it_now(); image rotate_image(image m, float rad); void visualize_network(network *net); float box_iou(box a, box b); -void do_nms(box *boxes, float **probs, int total, int classes, float thresh); data load_all_cifar10(); box_label *read_boxes(char *filename, int *n); box float_to_box(float *f, int stride); -void draw_detections(image im, int num, float thresh, box *boxes, float **probs, float **masks, char **names, image **alphabet, int classes); +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); matrix network_predict_data(network *net, data test); image **load_alphabet(); @@ -711,15 +738,18 @@ float *network_predict(network *net, float *input); int network_width(network *net); int network_height(network *net); float *network_predict_image(network *net, image im); -void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, box *boxes, float **probs); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets); int num_boxes(network *net); -box *make_boxes(network *net); +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative); +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets); +detection *make_network_boxes(network *net); +void free_detections(detection *dets, int n); void reset_network_state(network *net, int b); char **get_labels(char *filename); -void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh); -void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh); +void do_nms_obj(detection *dets, int total, int classes, float thresh); +void do_nms_sort(detection *dets, int total, int classes, float thresh); matrix make_matrix(int rows, int cols); @@ -758,11 +788,13 @@ void free_list(list *l); float mse_array(float *a, int n); float variance_array(float *a, int n); float mag_array(float *a, int n); +void scale_array(float *a, int n, float s); float mean_array(float *a, int n); float sum_array(float *a, int n); void normalize_array(float *a, int n); int *read_intlist(char *s, int *n, int d); size_t rand_size_t(); float rand_normal(); +float rand_uniform(float min, float max); #endif diff --git a/python/darknet.py b/python/darknet.py index 398a2913..203096b5 100644 --- a/python/darknet.py +++ b/python/darknet.py @@ -23,6 +23,15 @@ class BOX(Structure): ("w", c_float), ("h", c_float)] +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + class IMAGE(Structure): _fields_ = [("w", c_int), ("h", c_int), @@ -53,9 +62,16 @@ make_image = lib.make_image make_image.argtypes = [c_int, c_int, c_int] make_image.restype = IMAGE -make_boxes = lib.make_boxes -make_boxes.argtypes = [c_void_p] -make_boxes.restype = POINTER(BOX) +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] free_ptrs = lib.free_ptrs free_ptrs.argtypes = [POINTER(c_void_p), c_int] @@ -64,12 +80,8 @@ num_boxes = lib.num_boxes num_boxes.argtypes = [c_void_p] num_boxes.restype = c_int -make_probs = lib.make_probs -make_probs.argtypes = [c_void_p] -make_probs.restype = POINTER(POINTER(c_float)) - -detect = lib.network_predict -detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))] +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] reset_rnn = lib.reset_rnn reset_rnn.argtypes = [c_void_p] @@ -78,6 +90,12 @@ load_net = lib.load_network load_net.argtypes = [c_char_p, c_char_p, c_int] load_net.restype = c_void_p +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + free_image = lib.free_image free_image.argtypes = [IMAGE] @@ -100,21 +118,6 @@ predict_image = lib.network_predict_image predict_image.argtypes = [c_void_p, IMAGE] predict_image.restype = POINTER(c_float) -network_detect = lib.network_detect -network_detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))] - -import numpy -def array_to_image(arr): - arr = arr.copy() - arr = arr.transpose(2,0,1) - c = arr.shape[0] - h = arr.shape[1] - w = arr.shape[2] - arr = (arr.astype(numpy.float32)/255.0).flatten() - data = c_array(c_float, arr) - im = IMAGE(w,h,c,data) - return im - def classify(net, meta, im): out = predict_image(net, im) res = [] @@ -124,24 +127,21 @@ def classify(net, meta, im): return res def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): - if type(image) == numpy.ndarray: - im = array_to_image(image) - else: - im = load_image(image, 0, 0) - boxes = make_boxes(net) - probs = make_probs(net) + im = load_image(image, 0, 0) num = num_boxes(net) - network_detect(net, im, thresh, hier_thresh, nms, boxes, probs) + predict_image(net, im) + dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0) + if (nms): do_nms_obj(dets, num, meta.classes, nms); + res = [] for j in range(num): for i in range(meta.classes): - if probs[j][i] > 0: - res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) + if dets[j].prob[i] > 0: + b = dets[j].bbox + res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) res = sorted(res, key=lambda x: -x[1]) - - if type(image) != numpy.ndarray: - free_image(im) - free_ptrs(cast(probs, POINTER(c_void_p)), num) + free_image(im) + free_detections(dets, num) return res if __name__ == "__main__": @@ -153,6 +153,4 @@ if __name__ == "__main__": net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) meta = load_meta("cfg/coco.data") r = detect(net, meta, "data/dog.jpg") - print(r) - - + print r diff --git a/src/blas.c b/src/blas.c index d25c1969..a1bb5554 100644 --- a/src/blas.c +++ b/src/blas.c @@ -65,7 +65,7 @@ void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, floa } } -void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) { int stride = w1/w2; int sample = w2/w1; @@ -84,7 +84,7 @@ void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, for(i = 0; i < minw; ++i){ int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); - out[out_index] += add[add_index]; + out[out_index] = s1*out[out_index] + s2*add[add_index]; } } } @@ -123,6 +123,27 @@ void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, fl } } +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) +{ + int b,f,i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < spatial; ++i){ + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } + } + } +} + + void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) { int b, f, i; @@ -241,6 +262,28 @@ void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) } } +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p) - (1-t)*log(1-p); + delta[i] = t-p; + } +} + void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) { int i; @@ -288,3 +331,21 @@ void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, i } } +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + int i, j, k, b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h*stride; ++j){ + for(i = 0; i < w*stride; ++i){ + int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride; + int out_index = b*w*h*c + k*w*h + j*w + i; + if(forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } +} + + diff --git a/src/blas.h b/src/blas.h index a8408f32..707291de 100644 --- a/src/blas.h +++ b/src/blas.h @@ -19,10 +19,8 @@ void constrain_gpu(int N, float ALPHA, float * X, int INCX); void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); -void fill_cpu(int N, float ALPHA, float * X, int INCX); -float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); int test_gpu_blas(); -void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); @@ -32,15 +30,19 @@ void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial); void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); void l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc); void softmax(float *input, int n, float temp, int stride, float *output); void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); #ifdef GPU #include "cuda.h" @@ -52,7 +54,7 @@ void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); void add_gpu(int N, float ALPHA, float * X, int INCX); void supp_gpu(int N, float ALPHA, float * X, int INCX); -void mask_gpu(int N, float * X, float mask_num, float * mask); +void mask_gpu(int N, float * X, float mask_num, float * mask, float val); void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale); void const_gpu(int N, float ALPHA, float *X, int INCX); void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); @@ -61,6 +63,7 @@ void mul_gpu(int N, float *X, int INCX, float *Y, int INCY); void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial); void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); @@ -69,16 +72,19 @@ void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *varianc void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); -void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); void add_bias_gpu(float *output, float *biases, int batch, int n, int size); void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); +void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); void l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error); void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); void mult_add_into_gpu(int num, float *a, float *b, float *c); @@ -93,6 +99,7 @@ void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rat void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out); void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); #endif #endif diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index a483f2eb..47e82179 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -164,8 +164,11 @@ __global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float { int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (index >= N) return; + + float mhat = m[index] / (1.f - powf(B1, t)); + float vhat = v[index] / (1.f - powf(B2, t)); - x[index] = x[index] + (rate * sqrtf(1.f-powf(B2, t)) / (1.f-powf(B1, t)) * m[index] / (sqrtf(v[index]) + eps)); + x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); } extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) @@ -446,12 +449,6 @@ __global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) if(i < N) X[i*INCX] = ALPHA; } -__global__ void mask_kernel(int n, float *x, float mask_num, float *mask) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n && mask[i] == mask_num) x[i] = mask_num; -} - __global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; @@ -472,6 +469,35 @@ extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, check_error(cudaPeekAtLastError()); } +__global__ void l2norm_kernel(int N, float *x, float *dx, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int b = index / spatial; + int i = index % spatial; + int f; + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + if(sum == 0) sum = 1; + //printf("%f\n", sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } +} + +extern "C" void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial) +{ + size_t N = batch*spatial; + l2norm_kernel<<>>(N, x, dx, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + __global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) { const int threads = BLOCK; @@ -621,6 +647,18 @@ extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, check_error(cudaPeekAtLastError()); } +__global__ void mask_kernel(int n, float *x, float mask_num, float *mask, float val) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] = val; +} + +extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask, float val) +{ + mask_kernel<<>>(N, X, mask_num, mask, val); + check_error(cudaPeekAtLastError()); +} + __global__ void scale_mask_kernel(int n, float *x, float mask_num, float *mask, float scale) { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; @@ -633,12 +671,6 @@ extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, f check_error(cudaPeekAtLastError()); } -extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask) -{ - mask_kernel<<>>(N, X, mask_num, mask); - check_error(cudaPeekAtLastError()); -} - extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX) { const_kernel<<>>(N, ALPHA, X, INCX); @@ -676,7 +708,7 @@ extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX) check_error(cudaPeekAtLastError()); } -__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) { int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (id >= size) return; @@ -690,10 +722,11 @@ __global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stri int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); - out[out_index] += add[add_index]; + out[out_index] = s1*out[out_index] + s2*add[add_index]; + //out[out_index] += add[add_index]; } -extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) { int minw = (w1 < w2) ? w1 : w2; int minh = (h1 < h2) ? h1 : h2; @@ -707,7 +740,7 @@ extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int if(sample < 1) sample = 1; int size = batch * minw * minh * minc; - shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, s1, s2, out); check_error(cudaPeekAtLastError()); } @@ -734,6 +767,40 @@ extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, fl check_error(cudaPeekAtLastError()); } +__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + softmax_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void logistic_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p+.0000001) - (1-t)*log(1-p+.0000001); + delta[i] = t-p; + } +} + +extern "C" void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + logistic_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + __global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; @@ -766,6 +833,21 @@ extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *er check_error(cudaPeekAtLastError()); } +__global__ void wgan_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + error[i] = truth[i] ? -pred[i] : pred[i]; + delta[i] = (truth[i] > 0) ? 1 : -1; + } +} + +extern "C" void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + wgan_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + @@ -892,13 +974,13 @@ extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, f int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); /* - static int *tree_groups_size = 0; - static int *tree_groups_offset = 0; - if(!tree_groups_size){ - tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); - tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); - } - */ + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ int num = spatial*batch*hier.groups; softmax_tree_kernel<<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); check_error(cudaPeekAtLastError()); @@ -920,3 +1002,34 @@ extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, in softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); check_error(cudaPeekAtLastError()); } + + +__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int out_index = i; + int out_w = i%(w*stride); + i = i/(w*stride); + int out_h = i%(h*stride); + i = i/(h*stride); + int out_c = i%c; + i = i/c; + int b = i%batch; + + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; + + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + + + if(forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x+in_index, scale * out[out_index]); +} +extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t size = w*h*c*batch*stride*stride; + upsample_kernel<<>>(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); +} diff --git a/src/box.c b/src/box.c index 88ca71ac..f0a3504d 100644 --- a/src/box.c +++ b/src/box.c @@ -3,9 +3,83 @@ #include #include +int nms_comparator(const void *pa, const void *pb) +{ + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if(b.sort_class >= 0){ + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } else { + diff = a.objectness - b.objectness; + } + if(diff < 0) return 1; + else if(diff > 0) return -1; + return 0; +} + +void do_nms_obj(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + + for(i = 0; i < total; ++i){ + dets[i].sort_class = -1; + } + + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + if(dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].objectness = 0; + for(k = 0; k < classes; ++k){ + dets[j].prob[k] = 0; + } + } + } + } +} + + +void do_nms_sort(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(k = 0; k < classes; ++k){ + for(i = 0; i < total; ++i){ + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].prob[k] = 0; + } + } + } + } +} + box float_to_box(float *f, int stride) { - box b; + box b = {0}; b.x = f[0]; b.y = f[1*stride]; b.w = f[2*stride]; @@ -230,79 +304,6 @@ dbox diou(box a, box b) return dd; } -typedef struct{ - int index; - int class; - float **probs; -} sortable_bbox; - -int nms_comparator(const void *pa, const void *pb) -{ - sortable_bbox a = *(sortable_bbox *)pa; - sortable_bbox b = *(sortable_bbox *)pb; - float diff = a.probs[a.index][b.class] - b.probs[b.index][b.class]; - if(diff < 0) return 1; - else if(diff > 0) return -1; - return 0; -} - -void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh) -{ - int i, j, k; - sortable_bbox *s = calloc(total, sizeof(sortable_bbox)); - - for(i = 0; i < total; ++i){ - s[i].index = i; - s[i].class = classes; - s[i].probs = probs; - } - - qsort(s, total, sizeof(sortable_bbox), nms_comparator); - for(i = 0; i < total; ++i){ - if(probs[s[i].index][classes] == 0) continue; - box a = boxes[s[i].index]; - for(j = i+1; j < total; ++j){ - box b = boxes[s[j].index]; - if (box_iou(a, b) > thresh){ - for(k = 0; k < classes+1; ++k){ - probs[s[j].index][k] = 0; - } - } - } - } - free(s); -} - - -void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh) -{ - int i, j, k; - sortable_bbox *s = calloc(total, sizeof(sortable_bbox)); - - for(i = 0; i < total; ++i){ - s[i].index = i; - s[i].class = 0; - s[i].probs = probs; - } - - for(k = 0; k < classes; ++k){ - for(i = 0; i < total; ++i){ - s[i].class = k; - } - qsort(s, total, sizeof(sortable_bbox), nms_comparator); - for(i = 0; i < total; ++i){ - if(probs[s[i].index][k] == 0) continue; - box a = boxes[s[i].index]; - for(j = i+1; j < total; ++j){ - box b = boxes[s[j].index]; - if (box_iou(a, b) > thresh){ - probs[s[j].index][k] = 0; - } - } - } - } - free(s); -} void do_nms(box *boxes, float **probs, int total, int classes, float thresh) { diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 56043e78..8fa2ab2e 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -314,6 +314,9 @@ void update_convolutional_layer_gpu(layer l, update_args a) scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); } } + if(l.clip){ + constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1); + } } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index f197bcfe..e4fb9bde 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -203,6 +203,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int // float scale = 1./sqrt(size*size*c); float scale = sqrt(2./(size*size*c/l.groups)); + //printf("convscale %f\n", scale); //scale = .02; //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal(); @@ -321,7 +322,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int l.workspace_size = get_workspace_size(l); l.activation = activation; - fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.); return l; } diff --git a/src/cost_layer.c b/src/cost_layer.c index 7ef1094b..2138ff26 100644 --- a/src/cost_layer.c +++ b/src/cost_layer.c @@ -14,6 +14,7 @@ COST_TYPE get_cost_type(char *s) if (strcmp(s, "masked")==0) return MASKED; if (strcmp(s, "smooth")==0) return SMOOTH; if (strcmp(s, "L1")==0) return L1; + if (strcmp(s, "wgan")==0) return WGAN; fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); return SSE; } @@ -31,6 +32,8 @@ char *get_cost_string(COST_TYPE a) return "smooth"; case L1: return "L1"; + case WGAN: + return "wgan"; } return "sse"; } @@ -123,19 +126,18 @@ int float_abs_compare (const void * a, const void * b) void forward_cost_layer_gpu(cost_layer l, network net) { - if (!net.truth_gpu) return; + if (!net.truth) return; if(l.smooth){ scal_gpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1); add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1); } - if (l.cost_type == MASKED) { - mask_gpu(l.batch*l.inputs, net.input_gpu, SECRET_NUM, net.truth_gpu); - } if(l.cost_type == SMOOTH){ smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); } else if (l.cost_type == L1){ l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == WGAN){ + wgan_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); } else { l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); } @@ -144,6 +146,9 @@ void forward_cost_layer_gpu(cost_layer l, network net) scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale); scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale); } + if (l.cost_type == MASKED) { + mask_gpu(l.batch*l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + } if(l.ratio){ cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); diff --git a/src/data.c b/src/data.c index 935e6389..51900f26 100644 --- a/src/data.c +++ b/src/data.c @@ -230,7 +230,7 @@ void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, int id; int i; - for (i = 0; i < count && i < 30; ++i) { + for (i = 0; i < count && i < 90; ++i) { x = boxes[i].x; y = boxes[i].y; w = boxes[i].w; @@ -424,6 +424,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, float x,y,w,h; int id; int i; + int sub = 0; for (i = 0; i < count; ++i) { x = boxes[i].x; @@ -432,13 +433,16 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, h = boxes[i].h; id = boxes[i].id; - if ((w < .001 || h < .001)) continue; + if ((w < .001 || h < .001)) { + ++sub; + continue; + } - truth[i*5+0] = x; - truth[i*5+1] = y; - truth[i*5+2] = w; - truth[i*5+3] = h; - truth[i*5+4] = id; + truth[(i-sub)*5+0] = x; + truth[(i-sub)*5+1] = y; + truth[(i-sub)*5+2] = w; + truth[(i-sub)*5+3] = h; + truth[(i-sub)*5+4] = id; } free(boxes); } @@ -506,6 +510,7 @@ void fill_truth(char *path, char **labels, int k, float *truth) if(strstr(path, labels[i])){ truth[i] = 1; ++count; + //printf("%s %s %d\n", path, labels[i], i); } } if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path); @@ -543,19 +548,31 @@ void fill_hierarchy(float *truth, int k, tree *hierarchy) } } -matrix load_regression_labels_paths(char **paths, int n) +matrix load_regression_labels_paths(char **paths, int n, int k) { - matrix y = make_matrix(n, 1); - int i; + matrix y = make_matrix(n, k); + int i,j; for(i = 0; i < n; ++i){ char labelpath[4096]; - find_replace(paths[i], "images", "targets", labelpath); - find_replace(labelpath, "JPEGImages", "targets", labelpath); + find_replace(paths[i], "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".BMP", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPeG", ".txt", labelpath); + find_replace(labelpath, ".Jpeg", ".txt", labelpath); + find_replace(labelpath, ".PNG", ".txt", labelpath); + find_replace(labelpath, ".TIF", ".txt", labelpath); + find_replace(labelpath, ".bmp", ".txt", labelpath); + find_replace(labelpath, ".jpeg", ".txt", labelpath); find_replace(labelpath, ".jpg", ".txt", labelpath); find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".tif", ".txt", labelpath); FILE *file = fopen(labelpath, "r"); - fscanf(file, "%f", &(y.vals[i][0])); + for(j = 0; j < k; ++j){ + fscanf(file, "%f", &(y.vals[i][j])); + } fclose(file); } return y; @@ -578,18 +595,14 @@ matrix load_tags_paths(char **paths, int n, int k) { matrix y = make_matrix(n, k); int i; - int count = 0; + //int count = 0; for(i = 0; i < n; ++i){ char label[4096]; - find_replace(paths[i], "imgs", "labels", label); - find_replace(label, "_iconl.jpeg", ".txt", label); + find_replace(paths[i], "images", "labels", label); + find_replace(label, ".jpg", ".txt", label); FILE *file = fopen(label, "r"); - if(!file){ - find_replace(label, "labels", "labels2", label); - file = fopen(label, "r"); - if(!file) continue; - } - ++count; + if (!file) continue; + //++count; int tag; while(fscanf(file, "%d", &tag) == 1){ if(tag < k){ @@ -598,7 +611,7 @@ matrix load_tags_paths(char **paths, int n, int k) } fclose(file); } - printf("%d/%d\n", count, n); + //printf("%d/%d\n", count, n); return y; } @@ -906,7 +919,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter) d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.cols = h*w*3; - int k = (4+classes)*30; + int k = (4+classes)*90; d.y = make_matrix(1, k); int dw = w*jitter; @@ -1005,7 +1018,7 @@ void *load_thread(void *ptr) if (a.type == OLD_CLASSIFICATION_DATA){ *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); } else if (a.type == REGRESSION_DATA){ - *a.d = load_data_regression(a.paths, a.n, a.m, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + *a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); } else if (a.type == CLASSIFICATION_DATA){ *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center); } else if (a.type == SUPER_DATA){ @@ -1161,13 +1174,13 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale) return d; } -data load_data_regression(char **paths, int n, int m, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +data load_data_regression(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) { if(m) paths = get_random_paths(paths, n, m); data d = {0}; d.shallow = 0; d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); - d.y = load_regression_labels_paths(paths, n); + d.y = load_regression_labels_paths(paths, n, k); if(m) free(paths); return d; } diff --git a/src/data.h b/src/data.h index 931b99b2..781906f8 100644 --- a/src/data.h +++ b/src/data.h @@ -29,7 +29,7 @@ data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); data load_data_super(char **paths, int n, int m, int w, int h, int scale); data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); -data load_data_regression(char **paths, int n, int m, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); data load_go(char *filename); diff --git a/src/deconvolutional_kernels.cu b/src/deconvolutional_kernels.cu index d7e29462..8267dcfa 100644 --- a/src/deconvolutional_kernels.cu +++ b/src/deconvolutional_kernels.cu @@ -45,7 +45,7 @@ extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) { int i; - constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); if(l.batch_normalize){ @@ -116,18 +116,16 @@ void update_deconvolutional_layer_gpu(layer l, update_args a) float decay = a.decay; int batch = a.batch; - int size = l.size*l.size*l.c*l.n; - if(a.adam){ - adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, size, batch, a.t); + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); if(l.scales_gpu){ adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); } }else{ - axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); - axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); - scal_gpu(size, momentum, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); diff --git a/src/deconvolutional_layer.c b/src/deconvolutional_layer.c index 674ce6b3..00c0e857 100644 --- a/src/deconvolutional_layer.c +++ b/src/deconvolutional_layer.c @@ -15,6 +15,22 @@ static size_t get_workspace_size(layer l){ return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float); } +void bilinear_init(layer l) +{ + int i,j,f; + float center = (l.size-1) / 2.; + for(f = 0; f < l.n; ++f){ + for(j = 0; j < l.size; ++j){ + for(i = 0; i < l.size; ++i){ + float val = (1 - fabs(i - center)) * (1 - fabs(j - center)); + int c = f%l.c; + int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i; + l.weights[ind] = val; + } + } + } +} + layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam) { @@ -38,8 +54,11 @@ layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size l.biases = calloc(n, sizeof(float)); l.bias_updates = calloc(n, sizeof(float)); + //float scale = n/(size*size*c); + //printf("scale: %f\n", scale); float scale = .02; for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); + //bilinear_init(l); for(i = 0; i < n; ++i){ l.biases[i] = 0; } @@ -51,6 +70,8 @@ layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size l.outputs = l.out_w * l.out_h * l.out_c; l.inputs = l.w * l.h * l.c; + scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1); + l.output = calloc(l.batch*l.outputs, sizeof(float)); l.delta = calloc(l.batch*l.outputs, sizeof(float)); @@ -121,7 +142,7 @@ layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size l.mean_delta_gpu = cuda_make_array(0, n); l.variance_delta_gpu = cuda_make_array(0, n); - l.scales_gpu = cuda_make_array(0, n); + l.scales_gpu = cuda_make_array(l.scales, n); l.scale_updates_gpu = cuda_make_array(0, n); l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); diff --git a/src/demo.c b/src/demo.c index ec73abd5..68294b43 100644 --- a/src/demo.c +++ b/src/demo.c @@ -17,8 +17,6 @@ static char **demo_names; static image **demo_alphabet; static int demo_classes; -static float **probs; -static box *boxes; static network *net; static image buff [3]; static image buff_letter[3]; @@ -31,13 +29,19 @@ static float demo_hier = .5; static int running = 0; static int demo_frame = 3; -static int demo_detections = 0; -static float **predictions; static int demo_index = 0; +static int demo_detections = 0; +//static float **predictions; +static detection **dets; +static detection *avg; +//static float *avg; static int demo_done = 0; -static float *avg; double demo_time; +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative); +detection *make_network_boxes(network *net); +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets); + void *detect_in_thread(void *ptr) { running = 1; @@ -45,26 +49,45 @@ void *detect_in_thread(void *ptr) layer l = net->layers[net->n-1]; float *X = buff_letter[(buff_index+2)%3].data; - float *prediction = network_predict(net, X); + network_predict(net, X); - memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float)); - mean_arrays(predictions, demo_frame, l.outputs, avg); - l.output = avg; + /* if(l.type == DETECTION){ get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); - } else if (l.type == REGION){ - get_region_boxes(l, buff[0].w, buff[0].h, net->w, net->h, demo_thresh, probs, boxes, 0, 0, 0, demo_hier, 1); + } else */ + if (l.type == REGION){ + fill_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, dets[demo_index]); } else { error("Last layer must produce detections\n"); } - if (nms > 0) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + + int i,j; + box zero = {0}; + int classes = l.classes; + for(i = 0; i < demo_detections; ++i){ + avg[i].objectness = 0; + avg[i].bbox = zero; + memset(avg[i].prob, 0, classes*sizeof(float)); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(classes, 1./demo_frame, dets[j][i].prob, 1, avg[i].prob, 1); + avg[i].objectness += dets[j][i].objectness * 1./demo_frame; + avg[i].bbox.x += dets[j][i].bbox.x * 1./demo_frame; + avg[i].bbox.y += dets[j][i].bbox.y * 1./demo_frame; + avg[i].bbox.w += dets[j][i].bbox.w * 1./demo_frame; + avg[i].bbox.h += dets[j][i].bbox.h * 1./demo_frame; + } + //copy_cpu(classes, dets[0][i].prob, 1, avg[i].prob, 1); + //avg[i].objectness = dets[0][i].objectness; + } + + if (nms > 0) do_nms_obj(avg, demo_detections, l.classes, nms); printf("\033[2J"); printf("\033[1;1H"); printf("\nFPS:%.1f\n",fps); printf("Objects:\n\n"); image display = buff[(buff_index+2) % 3]; - draw_detections(display, demo_detections, demo_thresh, boxes, probs, 0, demo_names, demo_alphabet, demo_classes); + draw_detections(display, avg, demo_detections, demo_thresh, demo_names, demo_alphabet, demo_classes); demo_index = (demo_index + 1)%demo_frame; running = 0; @@ -117,8 +140,7 @@ void *detect_loop(void *ptr) void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) { - demo_frame = avg_frames; - predictions = calloc(demo_frame, sizeof(float*)); + //demo_frame = avg_frames; image **alphabet = load_alphabet(); demo_names = names; demo_alphabet = alphabet; @@ -152,16 +174,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch if(!cap) error("Couldn't connect to webcam.\n"); - layer l = net->layers[net->n-1]; - demo_detections = l.n*l.w*l.h; - int j; - - avg = (float *) calloc(l.outputs, sizeof(float)); - for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); - - boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); - probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); - for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float)); + demo_detections = num_boxes(net); + avg = make_network_boxes(net); + dets = calloc(demo_frame, sizeof(detection*)); + int i; + for(i = 0; i < demo_frame; ++i) dets[i] = make_network_boxes(net); buff[0] = get_image_from_stream(cap); buff[1] = copy_image(buff[0]); @@ -203,6 +220,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch } } +/* void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) { demo_frame = avg_frames; @@ -290,6 +308,7 @@ void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float th ++count; } } +*/ #else void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) { diff --git a/src/detection_layer.c b/src/detection_layer.c index 5c8a1cea..015ee317 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -222,7 +222,7 @@ void backward_detection_layer(const detection_layer l, network net) axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); } -void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness) +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) { int i,j,n; float *predictions = l.output; @@ -235,17 +235,17 @@ void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box int p_index = l.side*l.side*l.classes + i*l.n + n; float scale = predictions[p_index]; int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; - boxes[index].x = (predictions[box_index + 0] + col) / l.side * w; - boxes[index].y = (predictions[box_index + 1] + row) / l.side * h; - boxes[index].w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; - boxes[index].h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + box b; + b.x = (predictions[box_index + 0] + col) / l.side * w; + b.y = (predictions[box_index + 1] + row) / l.side * h; + b.w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; + b.h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; for(j = 0; j < l.classes; ++j){ int class_index = i*l.classes; float prob = scale*predictions[class_index+j]; - probs[index][j] = (prob > thresh) ? prob : 0; - } - if(only_objectness){ - probs[index][0] = scale; + dets[index].prob[j] = (prob > thresh) ? prob : 0; } } } diff --git a/src/image.c b/src/image.c index ac1b6296..b78d4020 100644 --- a/src/image.c +++ b/src/image.c @@ -131,6 +131,7 @@ image tile_images(image a, image b, int dx) image get_label(image **characters, char *string, int size) { + size = size/10; if(size > 7) size = 7; image label = make_empty_image(0,0,0); while(*string){ @@ -235,7 +236,7 @@ image **load_alphabet() return alphabets; } -void draw_detections(image im, int num, float thresh, box *boxes, float **probs, float **masks, char **names, image **alphabet, int classes) +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) { int i,j; @@ -243,7 +244,7 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char labelstr[4096] = {0}; int class = -1; for(j = 0; j < classes; ++j){ - if (probs[i][j] > thresh){ + if (dets[i].prob[j] > thresh){ if (class < 0) { strcat(labelstr, names[j]); class = j; @@ -251,7 +252,7 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, strcat(labelstr, ", "); strcat(labelstr, names[j]); } - printf("%s: %.0f%%\n", names[j], probs[i][j]*100); + printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100); } } if(class >= 0){ @@ -276,7 +277,8 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, rgb[0] = red; rgb[1] = green; rgb[2] = blue; - box b = boxes[i]; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); int left = (b.x-b.w/2.)*im.w; int right = (b.x+b.w/2.)*im.w; @@ -290,12 +292,12 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, draw_box_width(im, left, top, right, bot, width, red, green, blue); if (alphabet) { - image label = get_label(alphabet, labelstr, (im.h*.03)/10); + image label = get_label(alphabet, labelstr, (im.h*.03)); draw_label(im, top + width, left, label, rgb); free_image(label); } - if (masks){ - image mask = float_to_image(14, 14, 1, masks[i]); + if (dets[i].mask){ + image mask = float_to_image(14, 14, 1, dets[i].mask); image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); image tmask = threshold_image(resized_mask, .5); embed_image(tmask, im, left, top); @@ -394,6 +396,35 @@ void ghost_image(image source, image dest, int dx, int dy) } } +void blocky_image(image im, int s) +{ + int i,j,k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + } + } + } +} + +void censor_image(image im, int dx, int dy, int w, int h) +{ + int i,j,k; + int s = 32; + if(dx < 0) dx = 0; + if(dy < 0) dy = 0; + + for(k = 0; k < im.c; ++k){ + for(j = dy; j < dy + h && j < im.h; ++j){ + for(i = dx; i < dx + w && i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + //im.data[i + j*im.w + k*im.w*im.h] = 0; + } + } + } +} + void embed_image(image source, image dest, int dx, int dy) { int x,y,k; diff --git a/src/image.h b/src/image.h index 4ff0eacb..789cf18d 100644 --- a/src/image.h +++ b/src/image.h @@ -22,12 +22,10 @@ void show_image_cv(image p, const char *name, IplImage *disp); float get_color(int c, int x, int max); void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); void draw_bbox(image a, box bbox, int w, float r, float g, float b); -void draw_label(image a, int r, int c, image label, const float *rgb); void write_label(image a, int r, int c, image *characters, char *string, float *rgb); image image_distance(image a, image b); void scale_image(image m, float s); image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); -image center_crop_image(image im, int w, int h); image random_crop_image(image im, int w, int h); image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); diff --git a/src/l2norm_layer.c b/src/l2norm_layer.c new file mode 100644 index 00000000..d099479b --- /dev/null +++ b/src/l2norm_layer.c @@ -0,0 +1,63 @@ +#include "l2norm_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda.h" + +#include +#include +#include +#include +#include + +layer make_l2norm_layer(int batch, int inputs) +{ + fprintf(stderr, "l2norm %4d\n", inputs); + layer l = {0}; + l.type = L2NORM; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.output = calloc(inputs*batch, sizeof(float)); + l.scales = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + + l.forward = forward_l2norm_layer; + l.backward = backward_l2norm_layer; + #ifdef GPU + l.forward_gpu = forward_l2norm_layer_gpu; + l.backward_gpu = backward_l2norm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.scales_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_l2norm_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer(const layer l, network net) +{ + //axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_l2norm_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + l2normalize_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/src/l2norm_layer.h b/src/l2norm_layer.h new file mode 100644 index 00000000..1ca6f710 --- /dev/null +++ b/src/l2norm_layer.h @@ -0,0 +1,15 @@ +#ifndef L2NORM_LAYER_H +#define L2NORM_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_l2norm_layer(int batch, int inputs); +void forward_l2norm_layer(const layer l, network net); +void backward_l2norm_layer(const layer l, network net); + +#ifdef GPU +void forward_l2norm_layer_gpu(const layer l, network net); +void backward_l2norm_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/src/logistic_layer.c b/src/logistic_layer.c new file mode 100644 index 00000000..b2b3d6b1 --- /dev/null +++ b/src/logistic_layer.c @@ -0,0 +1,71 @@ +#include "logistic_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda.h" + +#include +#include +#include +#include +#include + +layer make_logistic_layer(int batch, int inputs) +{ + fprintf(stderr, "logistic x entropy %4d\n", inputs); + layer l = {0}; + l.type = LOGXENT; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_logistic_layer; + l.backward = backward_logistic_layer; + #ifdef GPU + l.forward_gpu = forward_logistic_layer_gpu; + l.backward_gpu = backward_logistic_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_logistic_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer(const layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_logistic_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/src/logistic_layer.h b/src/logistic_layer.h new file mode 100644 index 00000000..9c25bee3 --- /dev/null +++ b/src/logistic_layer.h @@ -0,0 +1,15 @@ +#ifndef LOGISTIC_LAYER_H +#define LOGISTIC_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_logistic_layer(int batch, int inputs); +void forward_logistic_layer(const layer l, network net); +void backward_logistic_layer(const layer l, network net); + +#ifdef GPU +void forward_logistic_layer_gpu(const layer l, network net); +void backward_logistic_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/src/network.c b/src/network.c index 1b4df6bc..f4966211 100644 --- a/src/network.c +++ b/src/network.c @@ -26,6 +26,7 @@ #include "softmax_layer.h" #include "dropout_layer.h" #include "route_layer.h" +#include "upsample_layer.h" #include "shortcut_layer.h" #include "parser.h" #include "data.h" @@ -377,6 +378,10 @@ int resize_network(network *net, int w, int h) resize_region_layer(&l, w, h); }else if(l.type == ROUTE){ resize_route_layer(&l, net); + }else if(l.type == SHORTCUT){ + resize_shortcut_layer(&l, w, h); + }else if(l.type == UPSAMPLE){ + resize_upsample_layer(&l, w, h); }else if(l.type == REORG){ resize_reorg_layer(&l, w, h); }else if(l.type == AVGPOOL){ @@ -412,7 +417,9 @@ int resize_network(network *net, int w, int h) cuda_free(net->truth_gpu); net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); - net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + if(workspace_size){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + } }else { free(net->workspace); net->workspace = calloc(1, workspace_size); @@ -497,34 +504,62 @@ float *network_predict(network *net, float *input) int num_boxes(network *net) { - layer l = net->layers[net->n-1]; - return l.w*l.h*l.n; + int i; + int s = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == REGION || l.type == DETECTION){ + s += l.w*l.h*l.n; + } + } + return s; } -box *make_boxes(network *net) +detection *make_network_boxes(network *net) { - layer l = net->layers[net->n-1]; - box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); - return boxes; + layer l = net->layers[net->n - 1]; + int i; + int nboxes = num_boxes(net); + detection *dets = calloc(nboxes, sizeof(detection)); + for(i = 0; i < nboxes; ++i){ + dets[i].prob = calloc(l.classes, sizeof(float)); + if(l.coords > 4){ + dets[i].mask = calloc(l.coords-4, sizeof(float)); + } + } + return dets; } - -float **make_probs(network *net) +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets) { int j; - layer l = net->layers[net->n-1]; - float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); - for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *)); - return probs; + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + if(l.type == REGION){ + get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l.w*l.h*l.n; + } + if(l.type == DETECTION){ + get_detection_detections(l, w, h, thresh, dets); + dets += l.w*l.h*l.n; + } + } } -void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, box *boxes, float **probs) +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative) { - network_predict_image(net, im); - layer l = net->layers[net->n-1]; - if(l.type == REGION){ - get_region_boxes(l, im.w, im.h, net->w, net->h, thresh, probs, boxes, 0, 0, 0, hier_thresh, 0); - if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); + detection *dets = make_network_boxes(net); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets); + return dets; +} + +void free_detections(detection *dets, int n) +{ + int i; + for(i = 0; i < n; ++i){ + free(dets[i].prob); + if(dets[i].mask) free(dets[i].mask); } + free(dets); } float *network_predict_image(network *net, image im) diff --git a/src/parser.c b/src/parser.c index da7487b0..e6dd2380 100644 --- a/src/parser.c +++ b/src/parser.c @@ -4,6 +4,8 @@ #include #include "activation_layer.h" +#include "logistic_layer.h" +#include "l2norm_layer.h" #include "activations.h" #include "avgpool_layer.h" #include "batchnorm_layer.h" @@ -27,6 +29,7 @@ #include "reorg_layer.h" #include "rnn_layer.h" #include "route_layer.h" +#include "upsample_layer.h" #include "shortcut_layer.h" #include "softmax_layer.h" #include "lstm_layer.h" @@ -53,6 +56,8 @@ LAYER_TYPE string_to_layer_type(char * type) if (strcmp(type, "[deconv]")==0 || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[logistic]")==0) return LOGXENT; + if (strcmp(type, "[l2norm]")==0) return L2NORM; if (strcmp(type, "[net]")==0 || strcmp(type, "[network]")==0) return NETWORK; if (strcmp(type, "[crnn]")==0) return CRNN; @@ -73,6 +78,7 @@ LAYER_TYPE string_to_layer_type(char * type) if (strcmp(type, "[soft]")==0 || strcmp(type, "[softmax]")==0) return SOFTMAX; if (strcmp(type, "[route]")==0) return ROUTE; + if (strcmp(type, "[upsample]")==0) return UPSAMPLE; return BLANK; } @@ -275,9 +281,27 @@ layer parse_region(list *options, size_params params) { int coords = option_find_int(options, "coords", 4); int classes = option_find_int(options, "classes", 20); - int num = option_find_int(options, "num", 1); + int total = option_find_int(options, "num", 1); + int num = total; - layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); + char *a = option_find_str(options, "mask", 0); + int *mask = 0; + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',')+1; + } + num = n; + } + layer l = make_region_layer(params.batch, params.w, params.h, num, total, mask, classes, coords); assert(l.outputs == params.inputs); l.log = option_find_int_quiet(options, "log", 0); @@ -285,11 +309,12 @@ layer parse_region(list *options, size_params params) l.softmax = option_find_int(options, "softmax", 0); l.background = option_find_int_quiet(options, "background", 0); - l.max_boxes = option_find_int_quiet(options, "max",30); + l.max_boxes = option_find_int_quiet(options, "max",90); l.jitter = option_find_float(options, "jitter", .2); l.rescore = option_find_int_quiet(options, "rescore",0); - l.thresh = option_find_float(options, "thresh", .5); + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); l.classfix = option_find_int_quiet(options, "classfix", 0); l.absolute = option_find_int_quiet(options, "absolute", 0); l.random = option_find_int_quiet(options, "random", 0); @@ -297,16 +322,17 @@ layer parse_region(list *options, size_params params) l.coord_scale = option_find_float(options, "coord_scale", 1); l.object_scale = option_find_float(options, "object_scale", 1); l.noobject_scale = option_find_float(options, "noobject_scale", 1); - l.mask_scale = option_find_float(options, "mask_scale", 1); + l.mask_scale = option_find_float_quiet(options, "mask_scale", 1); l.class_scale = option_find_float(options, "class_scale", 1); l.bias_match = option_find_int_quiet(options, "bias_match",0); + l.focus = option_find_float_quiet(options, "focus", 0); char *tree_file = option_find_str(options, "tree", 0); if (tree_file) l.softmax_tree = read_tree(tree_file); char *map_file = option_find_str(options, "map", 0); if (map_file) l.map = read_map(map_file); - char *a = option_find_str(options, "anchors", 0); + a = option_find_str(options, "anchors", 0); if(a){ int len = strlen(a); int n = 1; @@ -334,7 +360,7 @@ detection_layer parse_detection(list *options, size_params params) layer.softmax = option_find_int(options, "softmax", 0); layer.sqrt = option_find_int(options, "sqrt", 0); - layer.max_boxes = option_find_int_quiet(options, "max",30); + layer.max_boxes = option_find_int_quiet(options, "max",90); layer.coord_scale = option_find_float(options, "coord_scale", 1); layer.forced = option_find_int(options, "forced", 0); layer.object_scale = option_find_float(options, "object_scale", 1); @@ -470,10 +496,31 @@ layer parse_shortcut(list *options, size_params params, network *net) char *activation_s = option_find_str(options, "activation", "linear"); ACTIVATION activation = get_activation(activation_s); s.activation = activation; + s.alpha = option_find_float_quiet(options, "alpha", 1); + s.beta = option_find_float_quiet(options, "beta", 1); return s; } +layer parse_l2norm(list *options, size_params params) +{ + layer l = make_l2norm_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + + +layer parse_logistic(list *options, size_params params) +{ + layer l = make_logistic_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + layer parse_activation(list *options, size_params params) { char *activation_s = option_find_str(options, "activation", "linear"); @@ -481,16 +528,22 @@ layer parse_activation(list *options, size_params params) layer l = make_activation_layer(params.batch, params.inputs, activation); - l.out_h = params.h; - l.out_w = params.w; - l.out_c = params.c; - l.h = params.h; - l.w = params.w; - l.c = params.c; + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; return l; } +layer parse_upsample(list *options, size_params params, network *net) +{ + + int stride = option_find_int(options, "stride",2); + layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l.scale = option_find_float_quiet(options, "scale", 1); + return l; +} + route_layer parse_route(list *options, size_params params, network *net) { char *l = option_find(options, "layers"); @@ -575,6 +628,7 @@ void parse_net_options(list *options, network *net) net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w); net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w); net->center = option_find_int_quiet(options, "center",0); + net->clip = option_find_float_quiet(options, "clip", 0); net->angle = option_find_float_quiet(options, "angle", 0); net->aspect = option_find_float_quiet(options, "aspect", 1); @@ -673,6 +727,10 @@ network *parse_network_cfg(char *filename) l = parse_local(options, params); }else if(lt == ACTIVE){ l = parse_activation(options, params); + }else if(lt == LOGXENT){ + l = parse_logistic(options, params); + }else if(lt == L2NORM){ + l = parse_l2norm(options, params); }else if(lt == RNN){ l = parse_rnn(options, params); }else if(lt == GRU){ @@ -706,6 +764,8 @@ network *parse_network_cfg(char *filename) l = parse_avgpool(options, params); }else if(lt == ROUTE){ l = parse_route(options, params, net); + }else if(lt == UPSAMPLE){ + l = parse_upsample(options, params, net); }else if(lt == SHORTCUT){ l = parse_shortcut(options, params, net); }else if(lt == DROPOUT){ @@ -719,9 +779,11 @@ network *parse_network_cfg(char *filename) }else{ fprintf(stderr, "Type not recognized: %s\n", s->type); } + l.clip = net->clip; l.truth = option_find_int_quiet(options, "truth", 0); l.onlyforward = option_find_int_quiet(options, "onlyforward", 0); l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l.dontsave = option_find_int_quiet(options, "dontsave", 0); l.dontload = option_find_int_quiet(options, "dontload", 0); l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); @@ -905,6 +967,7 @@ void save_weights_upto(network *net, char *filename, int cutoff) int i; for(i = 0; i < net->n && i < cutoff; ++i){ layer l = net->layers[i]; + if (l.dontsave) continue; if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ save_convolutional_weights(l, fp); } if(l.type == CONNECTED){ diff --git a/src/region_layer.c b/src/region_layer.c index 449957c3..5ad7bc0a 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -10,12 +10,14 @@ #include #include -layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) +layer make_region_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int coords) { + int i; layer l = {0}; l.type = REGION; l.n = n; + l.total = total; l.batch = batch; l.h = h; l.w = w; @@ -26,15 +28,21 @@ layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) l.classes = classes; l.coords = coords; l.cost = calloc(1, sizeof(float)); - l.biases = calloc(n*2, sizeof(float)); + l.biases = calloc(total*2, sizeof(float)); + if(mask) l.mask = mask; + else{ + l.mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l.mask[i] = i; + } + } l.bias_updates = calloc(n*2, sizeof(float)); l.outputs = h*w*n*(classes + coords + 1); l.inputs = l.outputs; - l.truths = 30*(l.coords + 1); + l.truths = 90*(l.coords + 1); l.delta = calloc(batch*l.outputs, sizeof(float)); l.output = calloc(batch*l.outputs, sizeof(float)); - int i; - for(i = 0; i < n*2; ++i){ + for(i = 0; i < total*2; ++i){ l.biases[i] = .5; } @@ -73,30 +81,37 @@ void resize_region_layer(layer *l, int w, int h) #endif } -box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +box get_region_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) { box b; - b.x = (i + x[index + 0*stride]) / w; - b.y = (j + x[index + 1*stride]) / h; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 1*stride]) / lh; b.w = exp(x[index + 2*stride]) * biases[2*n] / w; b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; return b; } -float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride) +float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) { - box pred = get_region_box(x, biases, n, index, i, j, w, h, stride); + box pred = get_region_box(x, biases, n, index, i, j, lw, lh, w, h, stride); float iou = box_iou(pred, truth); - float tx = (truth.x*w - i); - float ty = (truth.y*h - j); + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); float tw = log(truth.w*w / biases[2*n]); float th = log(truth.h*h / biases[2*n + 1]); + //printf("%f %f %f %f\n", tx, ty, tw, th); + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + //printf("x: %f %f\n",tx , x[index + 0*stride]); + //printf("y: %f %f\n",ty , x[index + 1*stride]); + //printf("w: %f %f\n",tw , x[index + 2*stride]); + //printf("h: %f %f\n\n",th , x[index + 3*stride]); + //printf("%f %f %f %f\n", x[index + 0*stride], x[index + 1*stride], x[index + 2*stride], x[index + 3*stride]); return iou; } @@ -109,7 +124,7 @@ void delta_region_mask(float *truth, float *x, int n, int index, float *delta, i } -void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag) +void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag, float focus) { int i, n; if(hier){ @@ -125,15 +140,30 @@ void delta_region_class(float *output, float *delta, int index, int class, int c class = hier->parent[class]; } - *avg_cat += pred; + if(avg_cat) *avg_cat += pred; } else { if (delta[index] && tag){ - delta[index + stride*class] = scale * (1 - output[index + stride*class]); + if(focus){ + float y = -1; + float p = output[index + stride*class]; + float lg = p > .0000000001 ? log(p) : -10; + delta[index + stride*class] = y * pow(1-p, focus) * (focus*p*lg + p - 1); + }else{ + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + if(avg_cat) *avg_cat += output[index + stride*class]; + } return; } for(n = 0; n < classes; ++n){ - delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]); - if(n == class) *avg_cat += output[index + stride*n]; + if(focus){ + float y = (n == class) ? -1 : 1; + float p = (n == class) ? output[index + stride*n] : 1 - output[index + stride*n]; + float lg = p > .0000000001 ? log(p) : -10; + delta[index + stride*n] = y * pow(1-p, focus) * (focus*p*lg + p - 1); + }else{ + delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]); + } + if(n == class && avg_cat) *avg_cat += output[index + stride*n]; } } } @@ -189,6 +219,7 @@ void forward_region_layer(const layer l, network net) if(!net.train) return; float avg_iou = 0; float recall = 0; + float recall75 = 0; float avg_cat = 0; float avg_obj = 0; float avg_anyobj = 0; @@ -198,7 +229,7 @@ void forward_region_layer(const layer l, network net) for (b = 0; b < l.batch; ++b) { if(l.softmax_tree){ int onlyclass = 0; - for(t = 0; t < 30; ++t){ + for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); if(!truth.x) break; int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; @@ -218,7 +249,7 @@ void forward_region_layer(const layer l, network net) } int class_index = entry_index(l, b, maxi, l.coords + 1); int obj_index = entry_index(l, b, maxi, l.coords); - delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax, l.focus); if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]); else l.delta[obj_index] = 0; l.delta[obj_index] = 0; @@ -233,36 +264,50 @@ void forward_region_layer(const layer l, network net) for (i = 0; i < l.w; ++i) { for (n = 0; n < l.n; ++n) { int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); - box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + box pred = get_region_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); float best_iou = 0; - for(t = 0; t < 30; ++t){ + int best_t = 0; + for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); if(!truth.x) break; float iou = box_iou(pred, truth); if (iou > best_iou) { best_iou = iou; + best_t = t; } } int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords); avg_anyobj += l.output[obj_index]; l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); - if (best_iou > l.thresh) { + if (best_iou > l.ignore_thresh) { l.delta[obj_index] = 0; } + if (best_iou > l.truth_thresh) { + l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); - if(*(net.seen) < 12800){ - box truth = {0}; - truth.x = (i + .5)/l.w; - truth.y = (j + .5)/l.h; - truth.w = l.biases[2*n]/l.w; - truth.h = l.biases[2*n+1]/l.h; - delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h); + int class = net.truth[best_t*(l.coords + 1) + b*l.truths + l.coords]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords + 1); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, 0, !l.softmax, l.focus); + box truth = float_to_box(net.truth + best_t*(l.coords + 1) + b*l.truths, 1); + delta_region_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, l.coord_scale*(2-truth.w*truth.h), l.w*l.h); } + + /* + if(*(net.seen) < 12800){ + box truth = {0}; + truth.x = (i + .5)/l.w; + truth.y = (j + .5)/l.h; + truth.w = l.biases[2*l.mask[n]]/net.w; + truth.h = l.biases[2*l.mask[n]+1]/net.h; + delta_region_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, .01, l.w*l.h); + } + */ } } } - for(t = 0; t < 30; ++t){ + for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); if(!truth.x) break; @@ -275,16 +320,11 @@ void forward_region_layer(const layer l, network net) truth_shift.x = 0; truth_shift.y = 0; //printf("index %d %d\n",i, j); - for(n = 0; n < l.n; ++n){ - int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); - box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); - if(l.bias_match){ - pred.w = l.biases[2*n]/l.w; - pred.h = l.biases[2*n+1]/l.h; - } + for(n = 0; n < l.total; ++n){ + box pred = {0}; + pred.w = l.biases[2*n]/net.w; + pred.h = l.biases[2*n+1]/net.h; //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h); - pred.x = 0; - pred.y = 0; float iou = box_iou(pred, truth_shift); if (iou > best_iou){ best_iou = iou; @@ -293,37 +333,42 @@ void forward_region_layer(const layer l, network net) } //printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h); - int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h); - if(l.coords > 4){ - int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4); - delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale); - } - if(iou > .5) recall += 1; - avg_iou += iou; + int mask_n = int_index(l.mask, best_n, l.n); + //printf("%d %d\n", best_n, mask_n); + if(mask_n >= 0){ + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, l.coord_scale*(2-truth.w*truth.h), l.w*l.h); + if(l.coords > 4){ + int mask_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); + delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale); + } + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; - //l.delta[best_index + 4] = iou - l.output[best_index + 4]; - int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords); - avg_obj += l.output[obj_index]; - l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); - if (l.rescore) { - l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); - } - if(l.background){ - l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); - } + //l.delta[best_index + 4] = iou - l.output[best_index + 4]; + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, l.coords); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); + if (l.rescore) { + l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); + } + if(l.background){ + l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); + } - int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; - if (l.map) class = l.map[class]; - int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1); - delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); - ++count; - ++class_count; + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, l.coords + 1); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax, l.focus); + ++count; + ++class_count; + } } } //printf("\n"); *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); } void backward_region_layer(const layer l, network net) @@ -339,7 +384,7 @@ void backward_region_layer(const layer l, network net) */ } -void correct_region_boxes(box *boxes, int n, int w, int h, int netw, int neth, int relative) +void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) { int i; int new_w=0; @@ -352,7 +397,7 @@ void correct_region_boxes(box *boxes, int n, int w, int h, int netw, int neth, i new_w = (w * neth)/h; } for (i = 0; i < n; ++i){ - box b = boxes[i]; + box b = dets[i].bbox; b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); b.w *= (float)netw/new_w; @@ -363,11 +408,11 @@ void correct_region_boxes(box *boxes, int n, int w, int h, int netw, int neth, i b.y *= h; b.h *= h; } - boxes[i] = b; + dets[i].bbox = b; } } -void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, float **masks, int only_objectness, int *map, float tree_thresh, int relative) +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) { int i,j,n,z; float *predictions = l.output; @@ -399,17 +444,19 @@ void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, f int col = i % l.w; for(n = 0; n < l.n; ++n){ int index = n*l.w*l.h + i; - for(j = 0; j < l.classes; ++j){ - probs[index][j] = 0; + for (j = 0; j < l.classes; ++j) { + dets[index].prob[j] = 0; } int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); float scale = l.background ? 1 : predictions[obj_index]; - boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); - if(masks){ + dets[index].bbox = get_region_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[index].objectness = scale > thresh ? scale : 0; + dets[index].classes = l.classes; + if(dets[index].mask){ for(j = 0; j < l.coords - 4; ++j){ - masks[index][j] = l.output[mask_index + j*l.w*l.h]; + dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; } } @@ -421,39 +468,24 @@ void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, f for(j = 0; j < 200; ++j){ int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); float prob = scale*predictions[class_index]; - probs[index][j] = (prob > thresh) ? prob : 0; + dets[index].prob[j] = (prob > thresh) ? prob : 0; } } else { int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); - probs[index][j] = (scale > thresh) ? scale : 0; - probs[index][l.classes] = scale; + dets[index].prob[j] = (scale > thresh) ? scale : 0; } } else { - float max = 0; - for(j = 0; j < l.classes; ++j){ - int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); - float prob = scale*predictions[class_index]; - probs[index][j] = (prob > thresh) ? prob : 0; - if(prob > max) max = prob; - // TODO REMOVE - // if (j == 56 ) probs[index][j] = 0; - /* - if (j != 0) probs[index][j] = 0; - int blacklist[] = {121, 497, 482, 504, 122, 518,481, 418, 542, 491, 914, 478, 120, 510,500}; - int bb; - for (bb = 0; bb < sizeof(blacklist)/sizeof(int); ++bb){ - if(index == blacklist[bb]) probs[index][j] = 0; - } - */ + if(dets[index].objectness){ + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } } - probs[index][l.classes] = max; - } - if(only_objectness){ - probs[index][0] = scale; } } } - correct_region_boxes(boxes, l.w*l.h*l.n, w, h, netw, neth, relative); + correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); } #ifdef GPU @@ -479,17 +511,17 @@ void forward_region_layer_gpu(const layer l, network net) if (l.softmax_tree){ int index = entry_index(l, 0, 0, l.coords + 1); softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree); - /* - int mmin = 9000; - int mmax = 0; - int i; - for(i = 0; i < l.softmax_tree->groups; ++i){ - int group_size = l.softmax_tree->group_size[i]; - if (group_size < mmin) mmin = group_size; - if (group_size > mmax) mmax = group_size; - } + /* + int mmin = 9000; + int mmax = 0; + int i; + for(i = 0; i < l.softmax_tree->groups; ++i){ + int group_size = l.softmax_tree->group_size[i]; + if (group_size < mmin) mmin = group_size; + if (group_size > mmax) mmax = group_size; + } //printf("%d %d %d \n", l.softmax_tree->groups, mmin, mmax); - */ + */ /* // TIMING CODE int zz; @@ -573,13 +605,13 @@ void backward_region_layer_gpu(const layer l, network net) for (b = 0; b < l.batch; ++b){ for(n = 0; n < l.n; ++n){ int index = entry_index(l, b, n*l.w*l.h, 0); - gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); + //gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); if(l.coords > 4){ index = entry_index(l, b, n*l.w*l.h, 4); gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index); } index = entry_index(l, b, n*l.w*l.h, l.coords); - if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); + //if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); } } axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); diff --git a/src/region_layer.h b/src/region_layer.h index f67f9016..5aafe249 100644 --- a/src/region_layer.h +++ b/src/region_layer.h @@ -5,7 +5,7 @@ #include "layer.h" #include "network.h" -layer make_region_layer(int batch, int h, int w, int n, int classes, int coords); +layer make_region_layer(int batch, int h, int w, int n, int total, int *mask, int classes, int coords); void forward_region_layer(const layer l, network net); void backward_region_layer(const layer l, network net); void resize_region_layer(layer *l, int w, int h); diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index 0818ca7e..49d17f56 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -8,7 +8,7 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) { - fprintf(stderr,"Shortcut Layer: %d\n", index); + fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); layer l = {0}; l.type = SHORTCUT; l.batch = batch; @@ -38,32 +38,53 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int return l; } +void resize_shortcut_layer(layer *l, int w, int h) +{ + assert(l->w == l->out_w); + assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + + void forward_shortcut_layer(const layer l, network net) { copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); - shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output); + shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); activate_array(l.output, l.outputs*l.batch, l.activation); } void backward_shortcut_layer(const layer l, network net) { gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); - axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, net.delta, 1); - shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, net.layers[l.index].delta); + axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); + shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); } #ifdef GPU void forward_shortcut_layer_gpu(const layer l, network net) { copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); - shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); + shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); } void backward_shortcut_layer_gpu(const layer l, network net) { gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - axpy_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1, net.delta_gpu, 1); - shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, net.layers[l.index].delta_gpu); + axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); + shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); } #endif diff --git a/src/shortcut_layer.h b/src/shortcut_layer.h index 32e4ebdc..5f684fc1 100644 --- a/src/shortcut_layer.h +++ b/src/shortcut_layer.h @@ -7,6 +7,7 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); void forward_shortcut_layer(const layer l, network net); void backward_shortcut_layer(const layer l, network net); +void resize_shortcut_layer(layer *l, int w, int h); #ifdef GPU void forward_shortcut_layer_gpu(const layer l, network net); diff --git a/src/softmax_layer.c b/src/softmax_layer.c index 372b037c..afcc6342 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -18,8 +18,10 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups) l.groups = groups; l.inputs = inputs; l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); l.output = calloc(inputs*batch, sizeof(float)); l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); l.forward = forward_softmax_layer; l.backward = backward_softmax_layer; @@ -28,6 +30,7 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups) l.backward_gpu = backward_softmax_layer_gpu; l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); l.delta_gpu = cuda_make_array(l.delta, inputs*batch); #endif return l; @@ -46,6 +49,11 @@ void forward_softmax_layer(const softmax_layer l, network net) } else { softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); } + + if(net.truth){ + softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } } void backward_softmax_layer(const softmax_layer l, network net) @@ -63,6 +71,8 @@ void pull_softmax_layer_output(const softmax_layer layer) void forward_softmax_layer_gpu(const softmax_layer l, network net) { if(l.softmax_tree){ + softmax_tree(net.input_gpu, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); + /* int i; int count = 0; for (i = 0; i < l.softmax_tree->groups; ++i) { @@ -70,6 +80,7 @@ void forward_softmax_layer_gpu(const softmax_layer l, network net) softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); count += group_size; } + */ } else { if(l.spatial){ softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); @@ -77,6 +88,15 @@ void forward_softmax_layer_gpu(const softmax_layer l, network net) softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); } } + if(net.truth){ + softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + if(l.softmax_tree){ + mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + mask_gpu(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); + } + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } } void backward_softmax_layer_gpu(const softmax_layer layer, network net) diff --git a/src/tree.h b/src/tree.h index 100858a3..3802b8ea 100644 --- a/src/tree.h +++ b/src/tree.h @@ -2,7 +2,6 @@ #define TREE_H #include "darknet.h" -tree *read_tree(char *filename); int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); float get_hierarchy_probability(float *x, tree *hier, int c, int stride); diff --git a/src/upsample_layer.c b/src/upsample_layer.c new file mode 100644 index 00000000..605f21f8 --- /dev/null +++ b/src/upsample_layer.c @@ -0,0 +1,106 @@ +#include "upsample_layer.h" +#include "cuda.h" +#include "blas.h" + +#include + +layer make_upsample_layer(int batch, int w, int h, int c, int stride) +{ + layer l = {0}; + l.type = UPSAMPLE; + l.batch = batch; + l.w = w; + l.h = h; + l.c = c; + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c; + if(stride < 0){ + stride = -stride; + l.reverse=1; + l.out_w = w/stride; + l.out_h = h/stride; + } + l.stride = stride; + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = l.w*l.h*l.c; + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_upsample_layer; + l.backward = backward_upsample_layer; + #ifdef GPU + l.forward_gpu = forward_upsample_layer_gpu; + l.backward_gpu = backward_upsample_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_upsample_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->out_w = w*l->stride; + l->out_h = h*l->stride; + if(l->reverse){ + l->out_w = w/l->stride; + l->out_h = h/l->stride; + } + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->h*l->w*l->c; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_upsample_layer(const layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + if(l.reverse){ + upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); + }else{ + upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); + } +} + +void backward_upsample_layer(const layer l, network net) +{ + if(l.reverse){ + upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta); + }else{ + upsample_cpu(net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); + } +} + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.reverse){ + upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input_gpu); + }else{ + upsample_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); + } +} + +void backward_upsample_layer_gpu(const layer l, network net) +{ + if(l.reverse){ + upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta_gpu); + }else{ + upsample_gpu(net.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); + } +} +#endif diff --git a/src/upsample_layer.h b/src/upsample_layer.h new file mode 100644 index 00000000..86790d10 --- /dev/null +++ b/src/upsample_layer.h @@ -0,0 +1,15 @@ +#ifndef UPSAMPLE_LAYER_H +#define UPSAMPLE_LAYER_H +#include "darknet.h" + +layer make_upsample_layer(int batch, int w, int h, int c, int stride); +void forward_upsample_layer(const layer l, network net); +void backward_upsample_layer(const layer l, network net); +void resize_upsample_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net); +void backward_upsample_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/src/utils.c b/src/utils.c index 9f1af1df..4e4efc2c 100644 --- a/src/utils.c +++ b/src/utils.c @@ -627,6 +627,15 @@ int max_index(float *a, int n) return max_i; } +int int_index(int *a, int val, int n) +{ + int i; + for(i = 0; i < n; ++i){ + if(a[i] == val) return i; + } + return -1; +} + int rand_int(int min, int max) { if (max < min){ diff --git a/src/utils.h b/src/utils.h index b0db7abf..ef24da79 100644 --- a/src/utils.h +++ b/src/utils.h @@ -37,11 +37,9 @@ list *parse_csv_line(char *line); char *copy_string(char *s); int count_fields(char *line); float *parse_fields(char *line, int n); -void scale_array(float *a, int n, float s); void translate_array(float *a, int n, float s); float constrain(float min, float max, float a); int constrain_int(int a, int min, int max); -float rand_uniform(float min, float max); float rand_scale(float s); int rand_int(int min, int max); void mean_arrays(float **a, int n, int els, float *avg); @@ -49,6 +47,7 @@ float dist_array(float *a, float *b, int n, int sub); float **one_hot_encode(float *a, int n, int k); float sec(clock_t clocks); void print_statistics(float *a, int n); +int int_index(int *a, int val, int n); #endif