CLEAN UP CLEAN UP EVERYBODY DO YOUR oh wait it's just me

2023-08-10 21:13:14 +03:00 · 2018-03-24 18:23:04 -07:00 · 2018-03-24 18:23:04 -07:00 · 777b098232
commit 777b098232
parent e31c50127e
16 changed files with 620 additions and 337 deletions
--- a/6
+++ b/6
@ -1,6 +1,6 @@
 GPU=1
 CUDNN=1
-OPENCV=0
+OPENCV=1
 OPENMP=1
 DEBUG=0

@ -26,7 +26,7 @@ ARFLAGS=rcs
 OPTS=-Ofast
 LDFLAGS= -lm -pthread 
 COMMON= -Iinclude/ -Isrc/
-CFLAGS=-Wall -Wno-unknown-pragmas -Wfatal-errors -fPIC
+CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC

 ifeq ($(OPENMP), 1) 
 CFLAGS+= -fopenmp
@ -57,7 +57,7 @@ CFLAGS+= -DCUDNN
 LDFLAGS+= -lcudnn
 endif

-OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o  lstm_layer.o l2norm_layer.o
+OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o  lstm_layer.o l2norm_layer.o yolo_layer.o
 EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o darknet.o
 ifeq ($(GPU), 1) 
 LDFLAGS+= -lstdc++ 
--- a/examples/classifier.c
+++ b/examples/classifier.c
@ -51,7 +51,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    if (tree) net->hierarchy = read_tree(tree);
    int classes = option_find_int(options, "classes", 2);

-    char **labels;
+    char **labels = 0;
    if(!tag){
        labels = get_labels(label_list);
    }
@ -161,7 +161,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    pthread_join(load_thread, 0);

    free_network(net);
-    free_ptrs((void**)labels, classes);
+    if(labels) free_ptrs((void**)labels, classes);
    free_ptrs((void**)paths, plist->size);
    free_list(plist);
    free(base);
--- a/examples/coco.c
+++ b/examples/coco.c
@ -146,8 +146,6 @@ void validate_coco(char *cfg, char *weights)
    FILE *fp = fopen(buff, "w");
    fprintf(fp, "[\n");

-    detection *dets = make_network_boxes(net, 0);
-
    int m = plist->size;
    int i=0;
    int t;
@ -195,9 +193,11 @@ void validate_coco(char *cfg, char *weights)
            network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
-            fill_network_boxes(net, w, h, thresh, 0, 0, 0, dets);
+            int nboxes = 0;
+            detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes);
            if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh);
            print_cocos(fp, image_id, dets, l.side*l.side*l.n, classes, w, h);
+            free_detections(dets, nboxes);
            free_image(val[t]);
            free_image(val_resized[t]);
        }
@ -231,7 +231,6 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
        snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
        fps[j] = fopen(buff, "w");
    }
-    detection *dets = make_network_boxes(net, 0);

    int m = plist->size;
    int i=0;
@ -252,7 +251,8 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
        char *id = basecfg(path);
        network_predict(net, sized.data);

-        fill_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, dets);
+        int nboxes = 0;
+        detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes);
        if (nms) do_nms_obj(dets, side*side*l.n, 1, nms);

        char labelpath[4096];
@ -283,7 +283,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
                ++correct;
            }
        }
-
+        free_detections(dets, nboxes);
        fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
        free(id);
        free_image(orig);
@ -302,7 +302,6 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
    clock_t time;
    char buff[256];
    char *input = buff;
-    detection *dets = make_network_boxes(net, 0);
    while(1){
        if(filename){
            strncpy(input, filename, 256);
@ -320,12 +319,14 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));

-        fill_network_boxes(net, 1, 1, thresh, 0, 0, 0, dets);
+        int nboxes = 0;
+        detection *dets = get_network_boxes(net, im.w, im.h, thresh, 0, 0, 0, &nboxes);
        if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms);

        draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80);
        save_image(im, "prediction");
        show_image(im, "predictions");
+        free_detections(dets, nboxes);
        free_image(im);
        free_image(sized);
 #ifdef OPENCV
--- a/examples/detector.c
+++ b/examples/detector.c
@ -156,7 +156,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i

 static int get_coco_image_id(char *filename)
 {
-    char *p = strrchr(filename, '_');
+    char *p = strrchr(filename, '/');
+    char *c = strrchr(filename, '_');
+    if(c) p = c;
    return atoi(p+1);
 }

@ -467,6 +469,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
            } else {
                print_detector_detections(fps, id, dets, nboxes, classes, w, h);
            }
+            free_detections(dets, nboxes);
            free(id);
            free_image(val[t]);
            free_image(val_resized[t]);
@ -622,14 +625,13 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
    }
 }

+/*
 void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip)
 {
 #ifdef OPENCV
-    image **alphabet = load_alphabet();
    char *base = basecfg(cfgfile);
    network *net = load_network(cfgfile, weightfile, 0);
    set_batch_network(net, 1);
-    list *options = read_data_cfg(datacfg);

    srand(2222222);
    CvCapture * cap;
@ -650,20 +652,11 @@ void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
        cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h);
    }

-    int top = option_find_int(options, "top", 1);
-
-    char *label_list = option_find_str(options, "labels", 0);
-    char *name_list = option_find_str(options, "names", label_list);
-    char **names = get_labels(name_list);
-
-    int *indexes = calloc(top, sizeof(int));
-
    if(!cap) error("Couldn't connect to webcam.\n");
    cvNamedWindow(base, CV_WINDOW_NORMAL); 
    cvResizeWindow(base, 512, 512);
    float fps = 0;
    int i;
-    int count = 0;
    float nms = .45;

    while(1){
@ -709,11 +702,9 @@ void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
 void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip)
 {
 #ifdef OPENCV
-    image **alphabet = load_alphabet();
    char *base = basecfg(cfgfile);
    network *net = load_network(cfgfile, weightfile, 0);
    set_batch_network(net, 1);
-    list *options = read_data_cfg(datacfg);

    srand(2222222);
    CvCapture * cap;
@ -734,14 +725,6 @@ void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_in
        cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h);
    }

-    int top = option_find_int(options, "top", 1);
-
-    char *label_list = option_find_str(options, "labels", 0);
-    char *name_list = option_find_str(options, "names", label_list);
-    char **names = get_labels(name_list);
-
-    int *indexes = calloc(top, sizeof(int));
-
    if(!cap) error("Couldn't connect to webcam.\n");
    cvNamedWindow(base, CV_WINDOW_NORMAL); 
    cvResizeWindow(base, 512, 512);
@ -795,6 +778,7 @@ void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_in
    }
    #endif
 }
+*/

 /*
 void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets)
@ -848,15 +832,13 @@ void run_detector(int argc, char **argv)
    int width = find_int_arg(argc, argv, "-w", 0);
    int height = find_int_arg(argc, argv, "-h", 0);
    int fps = find_int_arg(argc, argv, "-fps", 0);
-    int class = find_int_arg(argc, argv, "-class", 0);
+    //int class = find_int_arg(argc, argv, "-class", 0);

    char *datacfg = argv[3];
    char *cfg = argv[4];
    char *weights = (argc > 5) ? argv[5] : 0;
    char *filename = (argc > 6) ? argv[6]: 0;
    if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
-    else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
-    else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
    else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
    else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
    else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
@ -868,4 +850,6 @@ void run_detector(int argc, char **argv)
        char **names = get_labels(name_list);
        demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
    }
+    //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
+    //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
 }
--- a/examples/lsd.c
+++ b/examples/lsd.c
@ -1,3 +1,4 @@
+#include <math.h>
 #include "darknet.h"

 /*
@ -478,7 +479,7 @@ void test_dcgan(char *cfgfile, char *weightfile)
    clock_t time;
    char buff[256];
    char *input = buff;
-    int i, imlayer = 0;
+    int imlayer = 0;

    imlayer = net->n-1;

@ -615,7 +616,7 @@ void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, i
            forward_network(anet);
            backward_network(anet);

-            float genaloss = *anet->cost / anet->batch;
+            //float genaloss = *anet->cost / anet->batch;

            scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);
            scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1);
@ -785,7 +786,7 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
            forward_network(anet);
            backward_network(anet);

-            float genaloss = *anet->cost / anet->batch;
+            //float genaloss = *anet->cost / anet->batch;
            //printf("%f\n", genaloss);

            scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);
--- a/examples/rnn.c
+++ b/examples/rnn.c
@ -100,8 +100,8 @@ float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, s
    float *y = calloc(batch * steps * characters, sizeof(float));
    for(i = 0; i < batch; ++i){
        int index = rand()%n;
-        int slen = strlen(source[index]);
-        int dlen = strlen(dest[index]);
+        //int slen = strlen(source[index]);
+        //int dlen = strlen(dest[index]);
        for(j = 0; j < steps; ++j){
            unsigned char curr = source[index][j];
            unsigned char next = dest[index][j];
--- a/examples/yolo.c
+++ b/examples/yolo.c
@ -133,7 +133,6 @@ void validate_yolo(char *cfg, char *weights)
    image *buf = calloc(nthreads, sizeof(image));
    image *buf_resized = calloc(nthreads, sizeof(image));
    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
-    detection *dets = make_network_boxes(net, 0);

    load_args args = {0};
    args.w = net->w;
@ -167,9 +166,11 @@ void validate_yolo(char *cfg, char *weights)
            network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
-            fill_network_boxes(net, w, h, thresh, 0, 0, 0, dets);
+            int nboxes = 0;
+            detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes);
            if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh);
            print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets);
+            free_detections(dets, nboxes);
            free(id);
            free_image(val[t]);
            free_image(val_resized[t]);
@ -200,7 +201,6 @@ void validate_yolo_recall(char *cfg, char *weights)
        snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
        fps[j] = fopen(buff, "w");
    }
-    detection *dets = make_network_boxes(net, 0);

    int m = plist->size;
    int i=0;
@ -221,7 +221,8 @@ void validate_yolo_recall(char *cfg, char *weights)
        char *id = basecfg(path);
        network_predict(net, sized.data);

-        fill_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, dets);
+        int nboxes = 0;
+        detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes);
        if (nms) do_nms_obj(dets, side*side*l.n, 1, nms);

        char labelpath[4096];
@ -254,6 +255,7 @@ void validate_yolo_recall(char *cfg, char *weights)
        }

        fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
+        free_detections(dets, nboxes);
        free(id);
        free_image(orig);
        free_image(sized);
@ -271,7 +273,6 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
    char buff[256];
    char *input = buff;
    float nms=.4;
-    detection *dets = make_network_boxes(net, 0);
    while(1){
        if(filename){
            strncpy(input, filename, 256);
@ -289,13 +290,14 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));

-        fill_network_boxes(net, 1, 1, thresh, 0, 0, 0, dets);
+        int nboxes = 0;
+        detection *dets = get_network_boxes(net, im.w, im.h, thresh, 0, 0, 0, &nboxes);
        if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms);

        draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20);
        save_image(im, "predictions");
        show_image(im, "predictions");
-
+        free_detections(dets, nboxes);
        free_image(im);
        free_image(sized);
 #ifdef OPENCV
--- a/include/darknet.h
+++ b/include/darknet.h
@ -85,6 +85,7 @@ typedef enum {
    NETWORK,
    XNOR,
    REGION,
+    YOLO,
    REORG,
    UPSAMPLE,
    LOGXENT,
@ -674,6 +675,7 @@ void get_detection_detections(layer l, int w, int h, float thresh, detection *de

 char *option_find_str(list *l, char *key, char *def);
 int option_find_int(list *l, char *key, int def);
+int option_find_int_quiet(list *l, char *key, int def);

 network *parse_network_cfg(char *filename);
 void save_weights(network *net, char *filename);
@ -682,7 +684,8 @@ void save_weights_upto(network *net, char *filename, int cutoff);
 void load_weights_upto(network *net, char *filename, int start, int cutoff);

 void zero_objectness(layer l);
-int get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets);
+void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets);
+int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets);
 void free_network(network *net);
 void set_batch_network(network *net, int b);
 void set_temp_network(network *net, float t);
--- a/src/demo.c
+++ b/src/demo.c
@ -50,7 +50,7 @@ void *detect_in_thread(void *ptr)
    if(l.type == DETECTION){
        get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
    } else */
-    detection *dets;
+    detection *dets = 0;
    int nboxes = 0;
    if (l.type == REGION){
        dets = get_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, &nboxes);
@ -174,8 +174,6 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch

    if(!cap) error("Couldn't connect to webcam.\n");

-    int i;
-
    buff[0] = get_image_from_stream(cap);
    buff[1] = copy_image(buff[0]);
    buff[2] = copy_image(buff[0]);
--- a/src/network.c
+++ b/src/network.c
@ -17,6 +17,7 @@
 #include "activation_layer.h"
 #include "detection_layer.h"
 #include "region_layer.h"
+#include "yolo_layer.h"
 #include "normalization_layer.h"
 #include "batchnorm_layer.h"
 #include "maxpool_layer.h"
@ -151,6 +152,8 @@ char *get_layer_string(LAYER_TYPE a)
            return "detection";
        case REGION:
            return "region";
+        case YOLO:
+            return "yolo";
        case DROPOUT:
            return "dropout";
        case CROP:
@ -376,6 +379,8 @@ int resize_network(network *net, int w, int h)
            resize_maxpool_layer(&l, w, h);
        }else if(l.type == REGION){
            resize_region_layer(&l, w, h);
+        }else if(l.type == YOLO){
+            resize_yolo_layer(&l, w, h);
        }else if(l.type == ROUTE){
            resize_route_layer(&l, net);
        }else if(l.type == SHORTCUT){
@ -508,10 +513,10 @@ int num_detections(network *net, float thresh)
    int s = 0;
    for(i = 0; i < net->n; ++i){
        layer l = net->layers[i];
-        if(l.type == REGION){
-            s += region_num_detections(l, thresh);
+        if(l.type == YOLO){
+            s += yolo_num_detections(l, thresh);
        }
-        if(l.type == DETECTION){
+        if(l.type == DETECTION || l.type == REGION){
            s += l.w*l.h*l.n;
        }
    }
@ -539,10 +544,14 @@ void fill_network_boxes(network *net, int w, int h, float thresh, float hier, in
    int j;
    for(j = 0; j < net->n; ++j){
        layer l = net->layers[j];
-        if(l.type == REGION){
-            int count = get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets);
+        if(l.type == YOLO){
+            int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets);
            dets += count;
        }
+        if(l.type == REGION){
+            get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets);
+            dets += l.w*l.h*l.n;
+        }
        if(l.type == DETECTION){
            get_detection_detections(l, w, h, thresh, dets);
            dets += l.w*l.h*l.n;
--- a/src/option_list.h
+++ b/src/option_list.h
@ -12,7 +12,6 @@ typedef struct{
 int read_option(char *s, list *options);
 void option_insert(list *l, char *key, char *val);
 char *option_find(list *l, char *key);
-int option_find_int_quiet(list *l, char *key, int def);
 float option_find_float(list *l, char *key, float def);
 float option_find_float_quiet(list *l, char *key, float def);
 void option_unused(list *l);
--- a/src/parser.c
+++ b/src/parser.c
@ -26,6 +26,7 @@
 #include "option_list.h"
 #include "parser.h"
 #include "region_layer.h"
+#include "yolo_layer.h"
 #include "reorg_layer.h"
 #include "rnn_layer.h"
 #include "route_layer.h"
@ -50,6 +51,7 @@ LAYER_TYPE string_to_layer_type(char * type)
    if (strcmp(type, "[cost]")==0) return COST;
    if (strcmp(type, "[detection]")==0) return DETECTION;
    if (strcmp(type, "[region]")==0) return REGION;
+    if (strcmp(type, "[yolo]")==0) return YOLO;
    if (strcmp(type, "[local]")==0) return LOCAL;
    if (strcmp(type, "[conv]")==0
            || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
@ -277,14 +279,8 @@ softmax_layer parse_softmax(list *options, size_params params)
    return layer;
 }

-layer parse_region(list *options, size_params params)
+int *parse_yolo_mask(char *a, int *num)
 {
-    int coords = option_find_int(options, "coords", 4);
-    int classes = option_find_int(options, "classes", 20);
-    int total = option_find_int(options, "num", 1);
-    int num = total;
-
-    char *a = option_find_str(options, "mask", 0);
    int *mask = 0;
    if(a){
        int len = strlen(a);
@ -299,36 +295,29 @@ layer parse_region(list *options, size_params params)
            mask[i] = val;
            a = strchr(a, ',')+1;
        }
-        num = n;
+        *num = n;
    }
-    layer l = make_region_layer(params.batch, params.w, params.h, num, total, mask, classes, coords);
+    return mask;
+}
+
+layer parse_yolo(list *options, size_params params)
+{
+    int classes = option_find_int(options, "classes", 20);
+    int total = option_find_int(options, "num", 1);
+    int num = total;
+
+    char *a = option_find_str(options, "mask", 0);
+    int *mask = parse_yolo_mask(a, &num);
+    layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes);
    assert(l.outputs == params.inputs);

-    l.log = option_find_int_quiet(options, "log", 0);
-    l.sqrt = option_find_int_quiet(options, "sqrt", 0);
-
-    l.softmax = option_find_int(options, "softmax", 0);
-    l.background = option_find_int_quiet(options, "background", 0);
    l.max_boxes = option_find_int_quiet(options, "max",90);
    l.jitter = option_find_float(options, "jitter", .2);
-    l.rescore = option_find_int_quiet(options, "rescore",0);

    l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
    l.truth_thresh = option_find_float(options, "truth_thresh", 1);
-    l.classfix = option_find_int_quiet(options, "classfix", 0);
-    l.absolute = option_find_int_quiet(options, "absolute", 0);
    l.random = option_find_int_quiet(options, "random", 0);

-    l.coord_scale = option_find_float(options, "coord_scale", 1);
-    l.object_scale = option_find_float(options, "object_scale", 1);
-    l.noobject_scale = option_find_float(options, "noobject_scale", 1);
-    l.mask_scale = option_find_float_quiet(options, "mask_scale", 1);
-    l.class_scale = option_find_float(options, "class_scale", 1);
-    l.bias_match = option_find_int_quiet(options, "bias_match",0);
-    l.focus = option_find_float_quiet(options, "focus", 0);
-
-    char *tree_file = option_find_str(options, "tree", 0);
-    if (tree_file) l.softmax_tree = read_tree(tree_file);
    char *map_file = option_find_str(options, "map", 0);
    if (map_file) l.map = read_map(map_file);

@ -348,6 +337,59 @@ layer parse_region(list *options, size_params params)
    }
    return l;
 }
+
+layer parse_region(list *options, size_params params)
+{
+    int coords = option_find_int(options, "coords", 4);
+    int classes = option_find_int(options, "classes", 20);
+    int num = option_find_int(options, "num", 1);
+
+    layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
+    assert(l.outputs == params.inputs);
+
+    l.log = option_find_int_quiet(options, "log", 0);
+    l.sqrt = option_find_int_quiet(options, "sqrt", 0);
+
+    l.softmax = option_find_int(options, "softmax", 0);
+    l.background = option_find_int_quiet(options, "background", 0);
+    l.max_boxes = option_find_int_quiet(options, "max",30);
+    l.jitter = option_find_float(options, "jitter", .2);
+    l.rescore = option_find_int_quiet(options, "rescore",0);
+
+    l.thresh = option_find_float(options, "thresh", .5);
+    l.classfix = option_find_int_quiet(options, "classfix", 0);
+    l.absolute = option_find_int_quiet(options, "absolute", 0);
+    l.random = option_find_int_quiet(options, "random", 0);
+
+    l.coord_scale = option_find_float(options, "coord_scale", 1);
+    l.object_scale = option_find_float(options, "object_scale", 1);
+    l.noobject_scale = option_find_float(options, "noobject_scale", 1);
+    l.mask_scale = option_find_float(options, "mask_scale", 1);
+    l.class_scale = option_find_float(options, "class_scale", 1);
+    l.bias_match = option_find_int_quiet(options, "bias_match",0);
+
+    char *tree_file = option_find_str(options, "tree", 0);
+    if (tree_file) l.softmax_tree = read_tree(tree_file);
+    char *map_file = option_find_str(options, "map", 0);
+    if (map_file) l.map = read_map(map_file);
+
+    char *a = option_find_str(options, "anchors", 0);
+    if(a){
+        int len = strlen(a);
+        int n = 1;
+        int i;
+        for(i = 0; i < len; ++i){
+            if (a[i] == ',') ++n;
+        }
+        for(i = 0; i < n; ++i){
+            float bias = atof(a);
+            l.biases[i] = bias;
+            a = strchr(a, ',')+1;
+        }
+    }
+    return l;
+}
+
 detection_layer parse_detection(list *options, size_params params)
 {
    int coords = option_find_int(options, "coords", 1);
@ -747,6 +789,8 @@ network *parse_network_cfg(char *filename)
            l = parse_cost(options, params);
        }else if(lt == REGION){
            l = parse_region(options, params);
+        }else if(lt == YOLO){
+            l = parse_yolo(options, params);
        }else if(lt == DETECTION){
            l = parse_detection(options, params);
        }else if(lt == SOFTMAX){
--- a/src/region_layer.c
+++ b/src/region_layer.c
@ -10,14 +10,12 @@
 #include <string.h>
 #include <stdlib.h>

-layer make_region_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int coords)
+layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
 {
-    int i;
    layer l = {0};
    l.type = REGION;

    l.n = n;
-    l.total = total;
    l.batch = batch;
    l.h = h;
    l.w = w;
@ -28,21 +26,15 @@ layer make_region_layer(int batch, int w, int h, int n, int total, int *mask, in
    l.classes = classes;
    l.coords = coords;
    l.cost = calloc(1, sizeof(float));
-    l.biases = calloc(total*2, sizeof(float));
-    if(mask) l.mask = mask;
-    else{
-        l.mask = calloc(n, sizeof(int));
-        for(i = 0; i < n; ++i){
-            l.mask[i] = i;
-        }
-    }
+    l.biases = calloc(n*2, sizeof(float));
    l.bias_updates = calloc(n*2, sizeof(float));
    l.outputs = h*w*n*(classes + coords + 1);
    l.inputs = l.outputs;
-    l.truths = 90*(l.coords + 1);
+    l.truths = 30*(l.coords + 1);
    l.delta = calloc(batch*l.outputs, sizeof(float));
    l.output = calloc(batch*l.outputs, sizeof(float));
-    for(i = 0; i < total*2; ++i){
+    int i;
+    for(i = 0; i < n*2; ++i){
        l.biases[i] = .5;
    }

@ -81,37 +73,30 @@ void resize_region_layer(layer *l, int w, int h)
 #endif
 }

-box get_region_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
+box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
 {
    box b;
-    b.x = (i + x[index + 0*stride]) / lw;
-    b.y = (j + x[index + 1*stride]) / lh;
+    b.x = (i + x[index + 0*stride]) / w;
+    b.y = (j + x[index + 1*stride]) / h;
    b.w = exp(x[index + 2*stride]) * biases[2*n]   / w;
    b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
    return b;
 }

-float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride)
+float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride)
 {
-    box pred = get_region_box(x, biases, n, index, i, j, lw, lh, w, h, stride);
+    box pred = get_region_box(x, biases, n, index, i, j, w, h, stride);
    float iou = box_iou(pred, truth);

-    float tx = (truth.x*lw - i);
-    float ty = (truth.y*lh - j);
+    float tx = (truth.x*w - i);
+    float ty = (truth.y*h - j);
    float tw = log(truth.w*w / biases[2*n]);
    float th = log(truth.h*h / biases[2*n + 1]);

-    //printf("%f %f %f %f\n", tx, ty, tw, th);
-
    delta[index + 0*stride] = scale * (tx - x[index + 0*stride]);
    delta[index + 1*stride] = scale * (ty - x[index + 1*stride]);
    delta[index + 2*stride] = scale * (tw - x[index + 2*stride]);
    delta[index + 3*stride] = scale * (th - x[index + 3*stride]);
-    //printf("x: %f %f\n",tx , x[index + 0*stride]);
-    //printf("y: %f %f\n",ty , x[index + 1*stride]);
-    //printf("w: %f %f\n",tw , x[index + 2*stride]);
-    //printf("h: %f %f\n\n",th , x[index + 3*stride]);
-    //printf("%f %f %f %f\n", x[index + 0*stride], x[index + 1*stride], x[index + 2*stride], x[index + 3*stride]);
    return iou;
 }

@ -124,7 +109,7 @@ void delta_region_mask(float *truth, float *x, int n, int index, float *delta, i
 }


-void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag, float focus)
+void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag)
 {
    int i, n;
    if(hier){
@ -140,30 +125,15 @@ void delta_region_class(float *output, float *delta, int index, int class, int c

            class = hier->parent[class];
        }
-        if(avg_cat) *avg_cat += pred;
+        *avg_cat += pred;
    } else {
        if (delta[index] && tag){
-            if(focus){
-                float y = -1;
-                float p = output[index + stride*class];
-                float lg = p > .0000000001 ? log(p) : -10;
-                delta[index + stride*class] = y * pow(1-p, focus) * (focus*p*lg + p - 1);
-            }else{
            delta[index + stride*class] = scale * (1 - output[index + stride*class]);
-                if(avg_cat) *avg_cat += output[index + stride*class];
-            }
            return;
        }
        for(n = 0; n < classes; ++n){
-            if(focus){
-                float y = (n == class) ? -1 : 1;
-                float p = (n == class) ? output[index + stride*n] : 1 - output[index + stride*n];
-                float lg = p > .0000000001 ? log(p) : -10;
-                delta[index + stride*n] = y * pow(1-p, focus) * (focus*p*lg + p - 1);
-            }else{
            delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]);
-            }
-            if(n == class && avg_cat) *avg_cat += output[index + stride*n];
+            if(n == class) *avg_cat += output[index + stride*n];
        }
    }
 }
@ -219,7 +189,6 @@ void forward_region_layer(const layer l, network net)
    if(!net.train) return;
    float avg_iou = 0;
    float recall = 0;
-    float recall75 = 0;
    float avg_cat = 0;
    float avg_obj = 0;
    float avg_anyobj = 0;
@ -229,7 +198,7 @@ void forward_region_layer(const layer l, network net)
    for (b = 0; b < l.batch; ++b) {
        if(l.softmax_tree){
            int onlyclass = 0;
-            for(t = 0; t < l.max_boxes; ++t){
+            for(t = 0; t < 30; ++t){
                box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1);
                if(!truth.x) break;
                int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords];
@ -249,7 +218,7 @@ void forward_region_layer(const layer l, network net)
                    }
                    int class_index = entry_index(l, b, maxi, l.coords + 1);
                    int obj_index = entry_index(l, b, maxi, l.coords);
-                    delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax, l.focus);
+                    delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax);
                    if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]);
                    else  l.delta[obj_index] = 0;
                    l.delta[obj_index] = 0;
@ -264,50 +233,36 @@ void forward_region_layer(const layer l, network net)
            for (i = 0; i < l.w; ++i) {
                for (n = 0; n < l.n; ++n) {
                    int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
-                    box pred = get_region_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h);
+                    box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
                    float best_iou = 0;
-                    int best_t = 0;
-                    for(t = 0; t < l.max_boxes; ++t){
+                    for(t = 0; t < 30; ++t){
                        box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1);
                        if(!truth.x) break;
                        float iou = box_iou(pred, truth);
                        if (iou > best_iou) {
                            best_iou = iou;
-                            best_t = t;
                        }
                    }
                    int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords);
                    avg_anyobj += l.output[obj_index];
                    l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]);
                    if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]);
-                    if (best_iou > l.ignore_thresh) {
+                    if (best_iou > l.thresh) {
                        l.delta[obj_index] = 0;
                    }
-                    if (best_iou > l.truth_thresh) {
-                        l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]);

-                        int class = net.truth[best_t*(l.coords + 1) + b*l.truths + l.coords];
-                        if (l.map) class = l.map[class];
-                        int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords + 1);
-                        delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, 0, !l.softmax, l.focus);
-                        box truth = float_to_box(net.truth + best_t*(l.coords + 1) + b*l.truths, 1);
-                        delta_region_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, l.coord_scale*(2-truth.w*truth.h), l.w*l.h);
-                    }
-
-                    /*
                    if(*(net.seen) < 12800){
                        box truth = {0};
                        truth.x = (i + .5)/l.w;
                        truth.y = (j + .5)/l.h;
-                       truth.w = l.biases[2*l.mask[n]]/net.w;
-                       truth.h = l.biases[2*l.mask[n]+1]/net.h;
-                       delta_region_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, .01, l.w*l.h);
-                       }
-                     */
+                        truth.w = l.biases[2*n]/l.w;
+                        truth.h = l.biases[2*n+1]/l.h;
+                        delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h);
                    }
                }
            }
-        for(t = 0; t < l.max_boxes; ++t){
+        }
+        for(t = 0; t < 30; ++t){
            box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1);

            if(!truth.x) break;
@ -315,39 +270,35 @@ void forward_region_layer(const layer l, network net)
            int best_n = 0;
            i = (truth.x * l.w);
            j = (truth.y * l.h);
-            //printf("%d %f %d %f\n", i, truth.x*l.w, j, truth.y*l.h);
            box truth_shift = truth;
            truth_shift.x = 0;
            truth_shift.y = 0;
-            //printf("index %d %d\n",i, j);
-            for(n = 0; n < l.total; ++n){
-                box pred = {0};
-                pred.w = l.biases[2*n]/net.w;
-                pred.h = l.biases[2*n+1]/net.h;
-                //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h);
+            for(n = 0; n < l.n; ++n){
+                int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
+                box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
+                if(l.bias_match){
+                    pred.w = l.biases[2*n]/l.w;
+                    pred.h = l.biases[2*n+1]/l.h;
+                }
+                pred.x = 0;
+                pred.y = 0;
                float iou = box_iou(pred, truth_shift);
                if (iou > best_iou){
                    best_iou = iou;
                    best_n = n;
                }
            }
-            //printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h);

-            int mask_n = int_index(l.mask, best_n, l.n);
-            //printf("%d %d\n", best_n, mask_n);
-            if(mask_n >= 0){
-                int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
-                float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, l.coord_scale*(2-truth.w*truth.h), l.w*l.h);
+            int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0);
+            float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale *  (2 - truth.w*truth.h), l.w*l.h);
            if(l.coords > 4){
-                    int mask_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4);
+                int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4);
                delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale);
            }
            if(iou > .5) recall += 1;
-                if(iou > .75) recall75 += 1;
            avg_iou += iou;

-                //l.delta[best_index + 4] = iou - l.output[best_index + 4];
-                int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, l.coords);
+            int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords);
            avg_obj += l.output[obj_index];
            l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]);
            if (l.rescore) {
@ -359,16 +310,14 @@ void forward_region_layer(const layer l, network net)

            int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords];
            if (l.map) class = l.map[class];
-                int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, l.coords + 1);
-                delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax, l.focus);
+            int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1);
+            delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax);
            ++count;
            ++class_count;
        }
    }
-    }
-    //printf("\n");
    *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
-    printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f,  count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count);
+    printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f,  count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count);
 }

 void backward_region_layer(const layer l, network net)
@ -412,27 +361,11 @@ void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int ne
    }
 }

-int region_num_detections(layer l, float thresh)
-{
-    int i, n;
-    int count = 0;
-    for (i = 0; i < l.w*l.h; ++i){
-        int row = i / l.w;
-        int col = i % l.w;
-        for(n = 0; n < l.n; ++n){
-            int index = n*l.w*l.h + i;
-            int obj_index  = entry_index(l, 0, n*l.w*l.h + i, l.coords);
-            if(l.output[obj_index] > thresh){
-                ++count;
-            }
-        }
-    }
-    return count;
-}
-
-void avg_flipped_region(layer l)
+void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets)
 {
    int i,j,n,z;
+    float *predictions = l.output;
+    if (l.batch == 2) {
        float *flip = l.output + l.outputs;
        for (j = 0; j < l.h; ++j) {
            for (i = 0; i < l.w/2; ++i) {
@ -455,30 +388,20 @@ void avg_flipped_region(layer l)
            l.output[i] = (l.output[i] + flip[i])/2.;
        }
    }
-
-int get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets)
-{
-    int i,j,n,z;
-    float *predictions = l.output;
-    if (l.batch == 2) avg_flipped_region(l);
-    int count = 0;
    for (i = 0; i < l.w*l.h; ++i){
        int row = i / l.w;
        int col = i % l.w;
        for(n = 0; n < l.n; ++n){
-            int obj_index  = entry_index(l, 0, n*l.w*l.h + i, l.coords);
-            if(predictions[obj_index] <= thresh) continue;
-            int index = count;
-            ++count;
-            int box_index  = entry_index(l, 0, n*l.w*l.h + i, 0);
-            int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4);
+            int index = n*l.w*l.h + i;
            for(j = 0; j < l.classes; ++j){
                dets[index].prob[j] = 0;
            }
+            int obj_index  = entry_index(l, 0, n*l.w*l.h + i, l.coords);
+            int box_index  = entry_index(l, 0, n*l.w*l.h + i, 0);
+            int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4);
            float scale = l.background ? 1 : predictions[obj_index];
-            dets[index].bbox = get_region_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
+            dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
            dets[index].objectness = scale > thresh ? scale : 0;
-            dets[index].classes = l.classes;
            if(dets[index].mask){
                for(j = 0; j < l.coords - 4; ++j){
                    dets[index].mask[j] = l.output[mask_index + j*l.w*l.h];
@ -510,8 +433,7 @@ int get_region_detections(layer l, int w, int h, int netw, int neth, float thres
            }
        }
    }
-    correct_region_boxes(dets, count, w, h, netw, neth, relative);
-    return count;
+    correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative);
 }

 #ifdef GPU
@ -537,80 +459,8 @@ void forward_region_layer_gpu(const layer l, network net)
    if (l.softmax_tree){
        int index = entry_index(l, 0, 0, l.coords + 1);
        softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree);
-        /*
-           int mmin = 9000;
-           int mmax = 0;
-           int i;
-           for(i = 0; i < l.softmax_tree->groups; ++i){
-           int group_size = l.softmax_tree->group_size[i];
-           if (group_size < mmin) mmin = group_size;
-           if (group_size > mmax) mmax = group_size;
-           }
-        //printf("%d %d %d \n", l.softmax_tree->groups, mmin, mmax);
-         */
-        /*
-        // TIMING CODE
-        int zz;
-        int number = 1000;
-        int count = 0;
-        int i;
-        for (i = 0; i < l.softmax_tree->groups; ++i) {
-        int group_size = l.softmax_tree->group_size[i];
-        count += group_size;
-        }
-        printf("%d %d\n", l.softmax_tree->groups, count);
-        {
-        double then = what_time_is_it_now();
-        for(zz = 0; zz < number; ++zz){
-        int index = entry_index(l, 0, 0, 5);
-        softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree);
-        }
-        cudaDeviceSynchronize();
-        printf("Good GPU Timing: %f\n", what_time_is_it_now() - then);
-        } 
-        {
-        double then = what_time_is_it_now();
-        for(zz = 0; zz < number; ++zz){
-        int i;
-        int count = 5;
-        for (i = 0; i < l.softmax_tree->groups; ++i) {
-        int group_size = l.softmax_tree->group_size[i];
-        int index = entry_index(l, 0, 0, count);
-        softmax_gpu(net.input_gpu + index, group_size, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
-        count += group_size;
-        }
-        }
-        cudaDeviceSynchronize();
-        printf("Bad GPU Timing: %f\n", what_time_is_it_now() - then);
-        }
-        {
-        double then = what_time_is_it_now();
-        for(zz = 0; zz < number; ++zz){
-        int i;
-        int count = 5;
-        for (i = 0; i < l.softmax_tree->groups; ++i) {
-        int group_size = l.softmax_tree->group_size[i];
-        softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count);
-        count += group_size;
-        }
-        }
-        cudaDeviceSynchronize();
-        printf("CPU Timing: %f\n", what_time_is_it_now() - then);
-        }
-         */
-        /*
-           int i;
-           int count = 5;
-           for (i = 0; i < l.softmax_tree->groups; ++i) {
-           int group_size = l.softmax_tree->group_size[i];
-           int index = entry_index(l, 0, 0, count);
-           softmax_gpu(net.input_gpu + index, group_size, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
-           count += group_size;
-           }
-         */
    } else if (l.softmax) {
        int index = entry_index(l, 0, 0, l.coords + !l.background);
-        //printf("%d\n", index);
        softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
    }
    if(!net.train || l.onlyforward){
@ -631,13 +481,13 @@ void backward_region_layer_gpu(const layer l, network net)
    for (b = 0; b < l.batch; ++b){
        for(n = 0; n < l.n; ++n){
            int index = entry_index(l, b, n*l.w*l.h, 0);
-            //gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index);
+            gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index);
            if(l.coords > 4){
                index = entry_index(l, b, n*l.w*l.h, 4);
                gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index);
            }
            index = entry_index(l, b, n*l.w*l.h, l.coords);
-            //if(!l.background) gradient_array_gpu(l.output_gpu + index,   l.w*l.h, LOGISTIC, l.delta_gpu + index);
+            if(!l.background) gradient_array_gpu(l.output_gpu + index,   l.w*l.h, LOGISTIC, l.delta_gpu + index);
        }
    }
    axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
--- a/src/region_layer.h
+++ b/src/region_layer.h
@ -5,11 +5,10 @@
 #include "layer.h"
 #include "network.h"

-layer make_region_layer(int batch, int h, int w, int n, int total, int *mask, int classes, int coords);
+layer make_region_layer(int batch, int w, int h, int n, int classes, int coords);
 void forward_region_layer(const layer l, network net);
 void backward_region_layer(const layer l, network net);
 void resize_region_layer(layer *l, int w, int h);
-int region_num_detections(layer l, float thresh);

 #ifdef GPU
 void forward_region_layer_gpu(const layer l, network net);
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@ -0,0 +1,374 @@
+#include "yolo_layer.h"
+#include "activations.h"
+#include "blas.h"
+#include "box.h"
+#include "cuda.h"
+#include "utils.h"
+
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes)
+{
+    int i;
+    layer l = {0};
+    l.type = YOLO;
+
+    l.n = n;
+    l.total = total;
+    l.batch = batch;
+    l.h = h;
+    l.w = w;
+    l.c = n*(classes + 4 + 1);
+    l.out_w = l.w;
+    l.out_h = l.h;
+    l.out_c = l.c;
+    l.classes = classes;
+    l.cost = calloc(1, sizeof(float));
+    l.biases = calloc(total*2, sizeof(float));
+    if(mask) l.mask = mask;
+    else{
+        l.mask = calloc(n, sizeof(int));
+        for(i = 0; i < n; ++i){
+            l.mask[i] = i;
+        }
+    }
+    l.bias_updates = calloc(n*2, sizeof(float));
+    l.outputs = h*w*n*(classes + 4 + 1);
+    l.inputs = l.outputs;
+    l.truths = 90*(4 + 1);
+    l.delta = calloc(batch*l.outputs, sizeof(float));
+    l.output = calloc(batch*l.outputs, sizeof(float));
+    for(i = 0; i < total*2; ++i){
+        l.biases[i] = .5;
+    }
+
+    l.forward = forward_yolo_layer;
+    l.backward = backward_yolo_layer;
+#ifdef GPU
+    l.forward_gpu = forward_yolo_layer_gpu;
+    l.backward_gpu = backward_yolo_layer_gpu;
+    l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
+    l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
+#endif
+
+    fprintf(stderr, "detection\n");
+    srand(0);
+
+    return l;
+}
+
+void resize_yolo_layer(layer *l, int w, int h)
+{
+    l->w = w;
+    l->h = h;
+
+    l->outputs = h*w*l->n*(l->classes + 4 + 1);
+    l->inputs = l->outputs;
+
+    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
+    l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
+
+#ifdef GPU
+    cuda_free(l->delta_gpu);
+    cuda_free(l->output_gpu);
+
+    l->delta_gpu =     cuda_make_array(l->delta, l->batch*l->outputs);
+    l->output_gpu =    cuda_make_array(l->output, l->batch*l->outputs);
+#endif
+}
+
+box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
+{
+    box b;
+    b.x = (i + x[index + 0*stride]) / lw;
+    b.y = (j + x[index + 1*stride]) / lh;
+    b.w = exp(x[index + 2*stride]) * biases[2*n]   / w;
+    b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
+    return b;
+}
+
+float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride)
+{
+    box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride);
+    float iou = box_iou(pred, truth);
+
+    float tx = (truth.x*lw - i);
+    float ty = (truth.y*lh - j);
+    float tw = log(truth.w*w / biases[2*n]);
+    float th = log(truth.h*h / biases[2*n + 1]);
+
+    delta[index + 0*stride] = scale * (tx - x[index + 0*stride]);
+    delta[index + 1*stride] = scale * (ty - x[index + 1*stride]);
+    delta[index + 2*stride] = scale * (tw - x[index + 2*stride]);
+    delta[index + 3*stride] = scale * (th - x[index + 3*stride]);
+    return iou;
+}
+
+
+void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat)
+{
+    int n;
+    if (delta[index]){
+        delta[index + stride*class] = 1 - output[index + stride*class];
+        if(avg_cat) *avg_cat += output[index + stride*class];
+        return;
+    }
+    for(n = 0; n < classes; ++n){
+        delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n];
+        if(n == class && avg_cat) *avg_cat += output[index + stride*n];
+    }
+}
+
+static int entry_index(layer l, int batch, int location, int entry)
+{
+    int n =   location / (l.w*l.h);
+    int loc = location % (l.w*l.h);
+    return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc;
+}
+
+void forward_yolo_layer(const layer l, network net)
+{
+    int i,j,b,t,n;
+    memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
+
+#ifndef GPU
+    for (b = 0; b < l.batch; ++b){
+        for(n = 0; n < l.n; ++n){
+            int index = entry_index(l, b, n*l.w*l.h, 0);
+            activate_array(l.output + index, 2*l.w*l.h, LOGISTIC);
+            index = entry_index(l, b, n*l.w*l.h, 4);
+            activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC);
+        }
+    }
+#endif
+
+    memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
+    if(!net.train) return;
+    float avg_iou = 0;
+    float recall = 0;
+    float recall75 = 0;
+    float avg_cat = 0;
+    float avg_obj = 0;
+    float avg_anyobj = 0;
+    int count = 0;
+    int class_count = 0;
+    *(l.cost) = 0;
+    for (b = 0; b < l.batch; ++b) {
+        for (j = 0; j < l.h; ++j) {
+            for (i = 0; i < l.w; ++i) {
+                for (n = 0; n < l.n; ++n) {
+                    int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
+                    box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h);
+                    float best_iou = 0;
+                    int best_t = 0;
+                    for(t = 0; t < l.max_boxes; ++t){
+                        box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1);
+                        if(!truth.x) break;
+                        float iou = box_iou(pred, truth);
+                        if (iou > best_iou) {
+                            best_iou = iou;
+                            best_t = t;
+                        }
+                    }
+                    int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4);
+                    avg_anyobj += l.output[obj_index];
+                    l.delta[obj_index] = 0 - l.output[obj_index];
+                    if (best_iou > l.ignore_thresh) {
+                        l.delta[obj_index] = 0;
+                    }
+                    if (best_iou > l.truth_thresh) {
+                        l.delta[obj_index] = 1 - l.output[obj_index];
+
+                        int class = net.truth[best_t*(4 + 1) + b*l.truths + 4];
+                        if (l.map) class = l.map[class];
+                        int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1);
+                        delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0);
+                        box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1);
+                        delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);
+                    }
+                }
+            }
+        }
+        for(t = 0; t < l.max_boxes; ++t){
+            box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1);
+
+            if(!truth.x) break;
+            float best_iou = 0;
+            int best_n = 0;
+            i = (truth.x * l.w);
+            j = (truth.y * l.h);
+            box truth_shift = truth;
+            truth_shift.x = truth_shift.y = 0;
+            for(n = 0; n < l.total; ++n){
+                box pred = {0};
+                pred.w = l.biases[2*n]/net.w;
+                pred.h = l.biases[2*n+1]/net.h;
+                float iou = box_iou(pred, truth_shift);
+                if (iou > best_iou){
+                    best_iou = iou;
+                    best_n = n;
+                }
+            }
+
+            int mask_n = int_index(l.mask, best_n, l.n);
+            if(mask_n >= 0){
+                int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
+                float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);
+
+                int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4);
+                avg_obj += l.output[obj_index];
+                l.delta[obj_index] = 1 - l.output[obj_index];
+
+                int class = net.truth[t*(4 + 1) + b*l.truths + 4];
+                if (l.map) class = l.map[class];
+                int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1);
+                delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat);
+
+                ++count;
+                ++class_count;
+                if(iou > .5) recall += 1;
+                if(iou > .75) recall75 += 1;
+                avg_iou += iou;
+            }
+        }
+    }
+    *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
+    printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f,  count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count);
+}
+
+void backward_yolo_layer(const layer l, network net)
+{
+   axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1);
+}
+
+void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative)
+{
+    int i;
+    int new_w=0;
+    int new_h=0;
+    if (((float)netw/w) < ((float)neth/h)) {
+        new_w = netw;
+        new_h = (h * netw)/w;
+    } else {
+        new_h = neth;
+        new_w = (w * neth)/h;
+    }
+    for (i = 0; i < n; ++i){
+        box b = dets[i].bbox;
+        b.x =  (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); 
+        b.y =  (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); 
+        b.w *= (float)netw/new_w;
+        b.h *= (float)neth/new_h;
+        if(!relative){
+            b.x *= w;
+            b.w *= w;
+            b.y *= h;
+            b.h *= h;
+        }
+        dets[i].bbox = b;
+    }
+}
+
+int yolo_num_detections(layer l, float thresh)
+{
+    int i, n;
+    int count = 0;
+    for (i = 0; i < l.w*l.h; ++i){
+        for(n = 0; n < l.n; ++n){
+            int obj_index  = entry_index(l, 0, n*l.w*l.h + i, 4);
+            if(l.output[obj_index] > thresh){
+                ++count;
+            }
+        }
+    }
+    return count;
+}
+
+void avg_flipped_yolo(layer l)
+{
+    int i,j,n,z;
+    float *flip = l.output + l.outputs;
+    for (j = 0; j < l.h; ++j) {
+        for (i = 0; i < l.w/2; ++i) {
+            for (n = 0; n < l.n; ++n) {
+                for(z = 0; z < l.classes + 4 + 1; ++z){
+                    int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i;
+                    int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1);
+                    float swap = flip[i1];
+                    flip[i1] = flip[i2];
+                    flip[i2] = swap;
+                    if(z == 0){
+                        flip[i1] = -flip[i1];
+                        flip[i2] = -flip[i2];
+                    }
+                }
+            }
+        }
+    }
+    for(i = 0; i < l.outputs; ++i){
+        l.output[i] = (l.output[i] + flip[i])/2.;
+    }
+}
+
+int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets)
+{
+    int i,j,n;
+    float *predictions = l.output;
+    if (l.batch == 2) avg_flipped_yolo(l);
+    int count = 0;
+    for (i = 0; i < l.w*l.h; ++i){
+        int row = i / l.w;
+        int col = i % l.w;
+        for(n = 0; n < l.n; ++n){
+            int obj_index  = entry_index(l, 0, n*l.w*l.h + i, 4);
+            float objectness = predictions[obj_index];
+            if(objectness <= thresh) continue;
+            int box_index  = entry_index(l, 0, n*l.w*l.h + i, 0);
+            dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
+            dets[count].objectness = objectness;
+            dets[count].classes = l.classes;
+            for(j = 0; j < l.classes; ++j){
+                int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j);
+                float prob = objectness*predictions[class_index];
+                dets[count].prob[j] = (prob > thresh) ? prob : 0;
+            }
+            ++count;
+        }
+    }
+    correct_yolo_boxes(dets, count, w, h, netw, neth, relative);
+    return count;
+}
+
+#ifdef GPU
+
+void forward_yolo_layer_gpu(const layer l, network net)
+{
+    copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1);
+    int b, n;
+    for (b = 0; b < l.batch; ++b){
+        for(n = 0; n < l.n; ++n){
+            int index = entry_index(l, b, n*l.w*l.h, 0);
+            activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC);
+            index = entry_index(l, b, n*l.w*l.h, 4);
+            activate_array_gpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC);
+        }
+    }
+    if(!net.train || l.onlyforward){
+        cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
+        return;
+    }
+
+    cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs);
+    forward_yolo_layer(l, net);
+    cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
+}
+
+void backward_yolo_layer_gpu(const layer l, network net)
+{
+    axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
+}
+#endif
+
--- a/src/yolo_layer.h
+++ b/src/yolo_layer.h
@ -0,0 +1,19 @@
+#ifndef YOLO_LAYER_H
+#define YOLO_LAYER_H
+
+#include "darknet.h"
+#include "layer.h"
+#include "network.h"
+
+layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes);
+void forward_yolo_layer(const layer l, network net);
+void backward_yolo_layer(const layer l, network net);
+void resize_yolo_layer(layer *l, int w, int h);
+int yolo_num_detections(layer l, float thresh);
+
+#ifdef GPU
+void forward_yolo_layer_gpu(const layer l, network net);
+void backward_yolo_layer_gpu(layer l, network net);
+#endif
+
+#endif