adding yolo9000

2023-08-10 21:13:14 +03:00 · 2017-01-04 04:44:00 -08:00 · 2017-01-04 04:44:00 -08:00 · d2dece3df7
commit d2dece3df7
parent 2710d63257
29 changed files with 19591 additions and 596 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,8 @@
 *.csv
 *.out
 *.png
 *.jpg
 old/
 mnist/
 data/
 caffe/
--- a/cfg/coco.data
+++ b/cfg/coco.data
@ -1,7 +1,7 @@
 classes= 80
 train  = /home/pjreddie/data/coco/trainvalno5k.txt
-valid  = coco_testdev
+#valid  = coco_testdev
-#valid = data/coco_val_5k.list
+valid = data/coco_val_5k.list
 names = data/coco.names
 backup = /home/pjreddie/backup/
 eval=coco
--- a/cfg/voc.data
+++ b/cfg/voc.data
@ -2,5 +2,5 @@ classes= 20
 train  = /home/pjreddie/data/voc/train.txt
 valid  = /home/pjreddie/data/voc/2007_test.txt
 names = data/voc.names
-backup = /home/pjreddie/backup/
+backup = backup
--- a/cfg/yolo9000.cfg
+++ b/cfg/yolo9000.cfg
@ -0,0 +1,211 @@
 [net]
 batch=1
 subdivisions=1
 height=416
 width=416
 channels=3
 momentum=0.9
 decay=0.0005
 learning_rate=0.00001
 max_batches = 242200
 policy=steps
 steps=500,200000,240000
 scales=10,.1,.1
 hue=.1
 saturation=.75
 exposure=.75
 [convolutional]
 batch_normalize=1
 filters=32
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 filters=28269
 size=1
 stride=1
 pad=1
 activation=linear
 [region]
 anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974
 bias_match=1
 classes=9418
 coords=4
 num=3
 softmax=1
 jitter=.2
 rescore=1
 object_scale=5
 noobject_scale=1
 class_scale=1
 coord_scale=1
 thresh = .6
 absolute=1
 random=1
 tree=data/9k.tree
 map = data/coco9k.map
--- a/data/9k.labels
+++ b/data/9k.labels
--- a/data/9k.names
+++ b/data/9k.names
--- a/data/coco9k.map
+++ b/data/coco9k.map
@ -0,0 +1,80 @@
 5177
 3768
 3802
 3800
 4107
 4072
 4071
 3797
 4097
 2645
 5150
 2644
 3257
 2523
 6527
 6866
 6912
 7342
 7255
 7271
 7217
 6858
 7343
 7233
 3704
 4374
 3641
 5001
 3899
 2999
 2631
 5141
 2015
 1133
 1935
 1930
 5144
 5143
 2371
 3916
 3745
 3640
 4749
 4736
 4735
 3678
 58
 42
 771
 81
 152
 141
 786
 700
 218
 791
 2518
 2521
 3637
 2458
 2505
 2519
 3499
 2837
 3503
 2597
 3430
 2080
 5103
 5111
 5102
 3013
 5096
 1102
 3218
 4010
 2266
 1127
 5122
 2360
--- a/data/inet9k.map
+++ b/data/inet9k.map
@ -0,0 +1,200 @@
 2687
 4107
 8407
 7254
 42
 6797
 127
 2268
 2442
 3704
 260
 1970
 58
 4443
 2661
 2043
 2039
 4858
 4007
 6858
 8408
 166
 2523
 3768
 4347
 6527
 2446
 5005
 3274
 3678
 4918
 709
 4072
 8428
 7223
 2251
 3802
 3848
 7271
 2677
 8267
 2849
 2518
 2738
 3746
 5105
 3430
 3503
 2249
 1841
 2032
 2358
 122
 3984
 4865
 3246
 5095
 6912
 6878
 8467
 2741
 1973
 3057
 7217
 1872
 44
 2452
 3637
 2704
 6917
 2715
 6734
 2325
 6864
 6677
 2035
 1949
 338
 2664
 5122
 1844
 784
 2223
 7188
 2719
 2670
 4830
 158
 4818
 7228
 1965
 7342
 786
 2095
 8281
 8258
 7406
 3915
 8382
 2437
 2837
 82
 6871
 1876
 7447
 8285
 5007
 2740
 3463
 5103
 3755
 4910
 6809
 3800
 118
 3396
 3092
 2709
 81
 7105
 4036
 2366
 1846
 5177
 2684
 64
 2041
 3919
 700
 3724
 1742
 39
 807
 7184
 2256
 235
 2778
 2996
 2030
 3714
 7167
 2369
 6705
 6861
 5096
 2597
 2166
 2036
 3228
 3747
 2711
 8300
 2226
 7153
 7255
 2631
 7109
 8242
 7445
 3776
 3803
 3690
 2025
 2521
 2316
 7190
 8249
 3352
 2639
 2887
 100
 4219
 3344
 5008
 7224
 3351
 2434
 2074
 2034
 8304
 5004
 6868
 5102
 2645
 4071
 2716
 2717
 7420
 3499
 3763
 5084
 2676
 2046
 5107
 5097
 3944
 4097
 7132
 3956
 7343
--- a/src/box.c
+++ b/src/box.c
@ -246,6 +246,34 @@ int nms_comparator(const void *pa, const void *pb)
    return 0;
 }
 void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh)
 {
    int i, j, k;
    sortable_bbox *s = calloc(total, sizeof(sortable_bbox));
    for(i = 0; i < total; ++i){
        s[i].index = i;       
        s[i].class = classes;
        s[i].probs = probs;
    }
    qsort(s, total, sizeof(sortable_bbox), nms_comparator);
    for(i = 0; i < total; ++i){
        if(probs[s[i].index][classes] == 0) continue;
        box a = boxes[s[i].index];
        for(j = i+1; j < total; ++j){
            box b = boxes[s[j].index];
            if (box_iou(a, b) > thresh){
                for(k = 0; k < classes+1; ++k){
                    probs[s[j].index][k] = 0;
                }
            }
        }
    }
    free(s);
 }
 void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh)
 {
    int i, j, k;
--- a/src/box.h
+++ b/src/box.h
@ -15,6 +15,7 @@ float box_rmse(box a, box b);
 dbox diou(box a, box b);
 void do_nms(box *boxes, float **probs, int total, int classes, float thresh);
 void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh);
 void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh);
 box decode_box(box b, box anchor);
 box encode_box(box b, box anchor);
--- a/src/coco.c
+++ b/src/coco.c
@ -384,5 +384,5 @@ void run_coco(int argc, char **argv)
    else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
    else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
-    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix);
+    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, .5);
 }
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@ -127,6 +127,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
 void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
 {
    //constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
    gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
    backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
--- a/src/cpu_gemm.c
+++ b/src/cpu_gemm.c
@ -1,91 +0,0 @@
 #include "mini_blas.h"
 void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA, 
        float *A, int lda, 
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
 {
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(k = 0; k < K; ++k){
            register float A_PART = ALPHA*A[i*lda+k];
            for(j = 0; j < N; ++j){
                C[i*ldc+j] += A_PART*B[k*ldb+j];
            }
        }
    }
 }
 void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA, 
        float *A, int lda, 
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
 {
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(j = 0; j < N; ++j){
            register float sum = 0;
            for(k = 0; k < K; ++k){
                sum += ALPHA*A[i*lda+k]*B[k+j*ldb];
            }
            C[i*ldc+j] += sum;
        }
    }
 }
 void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA, 
        float *A, int lda, 
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
 {
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(k = 0; k < K; ++k){
            register float A_PART = ALPHA*A[k*lda+i];
            for(j = 0; j < N; ++j){
                C[i*ldc+j] += A_PART*B[k*ldb+j];
            }
        }
    }
 }
 void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA, 
        float *A, int lda, 
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
 {
    int i,j,k;
    for(i = 0; i < M; ++i){
        for(j = 0; j < N; ++j){
            for(k = 0; k < K; ++k){
                C[i*ldc+j] += ALPHA*A[i+k*lda]*B[k+j*ldb];
            }
        }
    }
 }
 void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, 
        float *A, int lda, 
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
 {
    int i, j;
    for(i = 0; i < M; ++i){
        for(j = 0; j < N; ++j){
            C[i*ldc + j] *= BETA;
        }
    }
    if(!TA && !TB)
        cpu_gemm_nn( TA,  TB,  M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
    else if(TA && !TB)
        cpu_gemm_tn( TA,  TB,  M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
    else if(!TA && TB)
        cpu_gemm_nt( TA,  TB,  M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
    else
        cpu_gemm_tt( TA,  TB,  M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
 }
--- a/src/darknet.c
+++ b/src/darknet.c
@ -13,7 +13,7 @@
 #endif
 extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
-extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh);
+extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh);
 extern void run_voxel(int argc, char **argv);
 extern void run_yolo(int argc, char **argv);
 extern void run_detector(int argc, char **argv);
@ -129,7 +129,9 @@ void oneoff(char *cfgfile, char *weightfile, char *outfile)
    network net = parse_network_cfg(cfgfile);
    int oldn = net.layers[net.n - 2].n;
    int c = net.layers[net.n - 2].c;
-    net.layers[net.n - 2].n = 9372;
+    scal_cpu(oldn*c, .1, net.layers[net.n - 2].weights, 1);
    scal_cpu(oldn, 0, net.layers[net.n - 2].biases, 1);
    net.layers[net.n - 2].n = 9418;
    net.layers[net.n - 2].biases += 5;
    net.layers[net.n - 2].weights += 5*c;
    if(weightfile){
@ -383,7 +385,7 @@ int main(int argc, char **argv)
    } else if (0 == strcmp(argv[1], "detect")){
        float thresh = find_float_arg(argc, argv, "-thresh", .24);
        char *filename = (argc > 4) ? argv[4]: 0;
-        test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh);
+        test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5);
    } else if (0 == strcmp(argv[1], "cifar")){
        run_cifar(argc, argv);
    } else if (0 == strcmp(argv[1], "go")){
--- a/src/data.c
+++ b/src/data.c
@ -267,7 +267,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
        h =  boxes[i].h;
        id = boxes[i].id;
-        if (w < .01 || h < .01) continue;
+        if (w < .005 || h < .005) continue;
        int col = (int)(x*num_boxes);
        int row = (int)(y*num_boxes);
@ -317,7 +317,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
        h =  boxes[i].h;
        id = boxes[i].id;
-        if ((w < .01 || h < .01)) continue;
+        if ((w < .005 || h < .005)) continue;
        truth[i*5+0] = x;
        truth[i*5+1] = y;
--- a/src/demo.c
+++ b/src/demo.c
@ -31,6 +31,7 @@ static image disp = {0};
 static CvCapture * cap;
 static float fps = 0;
 static float demo_thresh = 0;
 static float demo_hier_thresh = .5;
 static float *predictions[FRAMES];
 static int demo_index = 0;
@ -63,7 +64,7 @@ void *detect_in_thread(void *ptr)
    if(l.type == DETECTION){
        get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
    } else if (l.type == REGION){
-        get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0);
+        get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0, demo_hier_thresh);
    } else {
        error("Last layer must produce detections\n");
    }
@ -91,7 +92,7 @@ double get_wall_time()
    return (double)time.tv_sec + (double)time.tv_usec * .000001;
 }
-void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix)
+void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh)
 {
    //skip = frame_skip;
    image **alphabet = load_alphabet();
@ -100,6 +101,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
    demo_alphabet = alphabet;
    demo_classes = classes;
    demo_thresh = thresh;
    demo_hier_thresh = hier_thresh;
    printf("Demo\n");
    net = parse_network_cfg(cfgfile);
    if(weightfile){
@ -127,7 +129,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
    boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box));
    probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *));
-    for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *));
+    for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float));
    pthread_t fetch_thread;
    pthread_t detect_thread;
@ -213,7 +215,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
    }
 }
 #else
-void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix)
+void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh)
 {
    fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
 }
--- a/src/demo.h
+++ b/src/demo.h
@ -2,6 +2,6 @@
 #define DEMO
 #include "image.h"
-void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix);
+void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh);
 #endif
--- a/src/detector.c
+++ b/src/detector.c
@ -81,7 +81,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
        if(l.random && count++%10 == 0){
            printf("Resizing\n");
            int dim = (rand() % 10 + 10) * 32;
-            if (get_current_batch(net)+100 > net.max_batches) dim = 544;
+            if (get_current_batch(net)+200 > net.max_batches) dim = 608;
            //int dim = (rand() % 4 + 16) * 32;
            printf("%d\n", dim);
            args.w = dim;
@ -231,7 +231,7 @@ void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int
    }
 }
-void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
+void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
 {
    int j;
    list *options = read_data_cfg(datacfg);
@ -251,7 +251,6 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
    srand(time(0));
    char *base = "comp4_det_test_";
    list *plist = get_paths(valid_images);
    char **paths = (char **)list_to_array(plist);
@ -265,19 +264,22 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
    int coco = 0;
    int imagenet = 0;
    if(0==strcmp(type, "coco")){
-        snprintf(buff, 1024, "%s/coco_results.json", prefix);
+        if(!outfile) outfile = "coco_results";
        snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
        fp = fopen(buff, "w");
        fprintf(fp, "[\n");
        coco = 1;
    } else if(0==strcmp(type, "imagenet")){
-        snprintf(buff, 1024, "%s/imagenet-detection.txt", prefix);
+        if(!outfile) outfile = "imagenet-detection";
        snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
        fp = fopen(buff, "w");
        imagenet = 1;
        classes = 200;
    } else {
        if(!outfile) outfile = "comp4_det_test_";
        fps = calloc(classes, sizeof(FILE *));
        for(j = 0; j < classes; ++j){
-            snprintf(buff, 1024, "%s/%s%s.txt", prefix, base, names[j]);
+            snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
            fps[j] = fopen(buff, "w");
        }
    }
@ -333,7 +335,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
            network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
-            get_region_boxes(l, w, h, thresh, probs, boxes, 0, map);
+            get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5);
            if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
            if (coco){
                print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@ -397,7 +399,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
        image sized = resize_image(orig, net.w, net.h);
        char *id = basecfg(path);
        network_predict(net, sized.data);
-        get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0);
+        get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0, .5);
        if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);
        char labelpath[4096];
@ -436,7 +438,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
    }
 }
-void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh)
+void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh)
 {
    list *options = read_data_cfg(datacfg);
    char *name_list = option_find_str(options, "names", "data/names.list");
@ -470,14 +472,15 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
        box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
        float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
-        for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
+        for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));
        float *X = sized.data;
        time=clock();
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
-        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0);
+        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);
-        if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
+        if (l.softmax_tree && nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
        save_image(im, "predictions");
        show_image(im, "predictions");
@ -498,6 +501,7 @@ void run_detector(int argc, char **argv)
 {
    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
    float thresh = find_float_arg(argc, argv, "-thresh", .24);
    float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
    int cam_index = find_int_arg(argc, argv, "-c", 0);
    int frame_skip = find_int_arg(argc, argv, "-s", 0);
    if(argc < 4){
@ -505,6 +509,7 @@ void run_detector(int argc, char **argv)
        return;
    }
    char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
    char *outfile = find_char_arg(argc, argv, "-out", 0);
    int *gpus = 0;
    int gpu = 0;
    int ngpus = 0;
@ -533,15 +538,15 @@ void run_detector(int argc, char **argv)
    char *cfg = argv[4];
    char *weights = (argc > 5) ? argv[5] : 0;
    char *filename = (argc > 6) ? argv[6]: 0;
-    if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh);
+    if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh);
    else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
-    else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights);
+    else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
    else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
    else if(0==strcmp(argv[2], "demo")) {
        list *options = read_data_cfg(datacfg);
        int classes = option_find_int(options, "classes", 20);
        char *name_list = option_find_str(options, "names", "data/names.list");
        char **names = get_labels(name_list);
-        demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix);
+        demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh);
    }
 }
--- a/src/layer.c
+++ b/src/layer.c
@ -11,34 +11,88 @@ void free_layer(layer l)
 #endif
        return;
    }
-    if(l.indexes)        free(l.indexes);
+    if(l.cweights)           free(l.cweights);
-    if(l.rand)           free(l.rand);
+    if(l.indexes)            free(l.indexes);
-    if(l.cost)           free(l.cost);
+    if(l.input_layers)       free(l.input_layers);
-    if(l.biases)         free(l.biases);
+    if(l.input_sizes)        free(l.input_sizes);
-    if(l.bias_updates)   free(l.bias_updates);
+    if(l.map)                free(l.map);
-    if(l.weights)        free(l.weights);
+    if(l.rand)               free(l.rand);
-    if(l.weight_updates) free(l.weight_updates);
+    if(l.cost)               free(l.cost);
-    if(l.col_image)      free(l.col_image);
+    if(l.state)              free(l.state);
-    if(l.input_layers)   free(l.input_layers);
+    if(l.prev_state)         free(l.prev_state);
-    if(l.input_sizes)    free(l.input_sizes);
+    if(l.forgot_state)       free(l.forgot_state);
-    if(l.delta)          free(l.delta);
+    if(l.forgot_delta)       free(l.forgot_delta);
-    if(l.output)         free(l.output);
+    if(l.state_delta)        free(l.state_delta);
-    if(l.squared)        free(l.squared);
+    if(l.concat)             free(l.concat);
-    if(l.norms)          free(l.norms);
+    if(l.concat_delta)       free(l.concat_delta);
    if(l.binary_weights)     free(l.binary_weights);
    if(l.biases)             free(l.biases);
    if(l.bias_updates)       free(l.bias_updates);
    if(l.scales)             free(l.scales);
    if(l.scale_updates)      free(l.scale_updates);
    if(l.weights)            free(l.weights);
    if(l.weight_updates)     free(l.weight_updates);
    if(l.col_image)          free(l.col_image);
    if(l.delta)              free(l.delta);
    if(l.output)             free(l.output);
    if(l.squared)            free(l.squared);
    if(l.norms)              free(l.norms);
    if(l.spatial_mean)       free(l.spatial_mean);
    if(l.mean)               free(l.mean);
    if(l.variance)           free(l.variance);
    if(l.mean_delta)         free(l.mean_delta);
    if(l.variance_delta)     free(l.variance_delta);
    if(l.rolling_mean)       free(l.rolling_mean);
    if(l.rolling_variance)   free(l.rolling_variance);
    if(l.x)                  free(l.x);
    if(l.x_norm)             free(l.x_norm);
    if(l.m)                  free(l.m);
    if(l.v)                  free(l.v);
    if(l.z_cpu)              free(l.z_cpu);
    if(l.r_cpu)              free(l.r_cpu);
    if(l.h_cpu)              free(l.h_cpu);
    if(l.binary_input)       free(l.binary_input);
 #ifdef GPU
-    if(l.indexes_gpu)          cuda_free((float *)l.indexes_gpu);
+    if(l.indexes_gpu)           cuda_free((float *)l.indexes_gpu);
-    if(l.weights_gpu)          cuda_free(l.weights_gpu);
+
-    if(l.weight_updates_gpu)   cuda_free(l.weight_updates_gpu);
+    if(l.z_gpu)                   cuda_free(l.z_gpu);
-    if(l.col_image_gpu)        cuda_free(l.col_image_gpu);
+    if(l.r_gpu)                   cuda_free(l.r_gpu);
-    if(l.weights_gpu)          cuda_free(l.weights_gpu);
+    if(l.h_gpu)                   cuda_free(l.h_gpu);
-    if(l.biases_gpu)           cuda_free(l.biases_gpu);
+    if(l.m_gpu)                   cuda_free(l.m_gpu);
-    if(l.weight_updates_gpu)   cuda_free(l.weight_updates_gpu);
+    if(l.v_gpu)                   cuda_free(l.v_gpu);
-    if(l.bias_updates_gpu)     cuda_free(l.bias_updates_gpu);
+    if(l.prev_state_gpu)          cuda_free(l.prev_state_gpu);
-    if(l.output_gpu)           cuda_free(l.output_gpu);
+    if(l.forgot_state_gpu)        cuda_free(l.forgot_state_gpu);
-    if(l.delta_gpu)            cuda_free(l.delta_gpu);
+    if(l.forgot_delta_gpu)        cuda_free(l.forgot_delta_gpu);
-    if(l.rand_gpu)             cuda_free(l.rand_gpu);
+    if(l.state_gpu)               cuda_free(l.state_gpu);
-    if(l.squared_gpu)          cuda_free(l.squared_gpu);
+    if(l.state_delta_gpu)         cuda_free(l.state_delta_gpu);
-    if(l.norms_gpu)            cuda_free(l.norms_gpu);
+    if(l.gate_gpu)                cuda_free(l.gate_gpu);
    if(l.gate_delta_gpu)          cuda_free(l.gate_delta_gpu);
    if(l.save_gpu)                cuda_free(l.save_gpu);
    if(l.save_delta_gpu)          cuda_free(l.save_delta_gpu);
    if(l.concat_gpu)              cuda_free(l.concat_gpu);
    if(l.concat_delta_gpu)        cuda_free(l.concat_delta_gpu);
    if(l.binary_input_gpu)        cuda_free(l.binary_input_gpu);
    if(l.binary_weights_gpu)      cuda_free(l.binary_weights_gpu);
    if(l.mean_gpu)                cuda_free(l.mean_gpu);
    if(l.variance_gpu)            cuda_free(l.variance_gpu);
    if(l.rolling_mean_gpu)        cuda_free(l.rolling_mean_gpu);
    if(l.rolling_variance_gpu)    cuda_free(l.rolling_variance_gpu);
    if(l.variance_delta_gpu)      cuda_free(l.variance_delta_gpu);
    if(l.mean_delta_gpu)          cuda_free(l.mean_delta_gpu);
    if(l.col_image_gpu)           cuda_free(l.col_image_gpu);
    if(l.x_gpu)                   cuda_free(l.x_gpu);
    if(l.x_norm_gpu)              cuda_free(l.x_norm_gpu);
    if(l.weights_gpu)             cuda_free(l.weights_gpu);
    if(l.weight_updates_gpu)      cuda_free(l.weight_updates_gpu);
    if(l.biases_gpu)              cuda_free(l.biases_gpu);
    if(l.bias_updates_gpu)        cuda_free(l.bias_updates_gpu);
    if(l.scales_gpu)              cuda_free(l.scales_gpu);
    if(l.scale_updates_gpu)       cuda_free(l.scale_updates_gpu);
    if(l.output_gpu)              cuda_free(l.output_gpu);
    if(l.delta_gpu)               cuda_free(l.delta_gpu);
    if(l.rand_gpu)                cuda_free(l.rand_gpu);
    if(l.squared_gpu)             cuda_free(l.squared_gpu);
    if(l.norms_gpu)               cuda_free(l.norms_gpu);
 #endif
 }
--- a/src/layer.h
+++ b/src/layer.h
@ -99,14 +99,7 @@ struct layer{
    float B1;
    float B2;
    float eps;
    float *m_gpu;
    float *v_gpu;
    int t;
    float *m;
    float *v;
    tree *softmax_tree;
    int  *map;
    float alpha;
    float beta;
@ -129,33 +122,34 @@ struct layer{
    float probability;
    float scale;
-    int *indexes;
+    char  * cweights;
-    float *rand;
+    int   * indexes;
    float *cost;
    char  *cweights;
    float *state;
    float *prev_state;
    float *forgot_state;
    float *forgot_delta;
    float *state_delta;
    float *concat;
    float *concat_delta;
    float *binary_weights;
    float *biases;
    float *bias_updates;
    float *scales;
    float *scale_updates;
    float *weights;
    float *weight_updates;
    float *col_image;
    int   * input_layers;
    int   * input_sizes;
    int   * map;
    float * rand;
    float * cost;
    float * state;
    float * prev_state;
    float * forgot_state;
    float * forgot_delta;
    float * state_delta;
    float * concat;
    float * concat_delta;
    float * binary_weights;
    float * biases;
    float * bias_updates;
    float * scales;
    float * scale_updates;
    float * weights;
    float * weight_updates;
    float * col_image;
    float * delta;
    float * output;
    float * squared;
@ -174,6 +168,15 @@ struct layer{
    float * x;
    float * x_norm;
    float * m;
    float * v;
    float * z_cpu;
    float * r_cpu;
    float * h_cpu;
    float * binary_input;
    struct layer *input_layer;
    struct layer *self_layer;
    struct layer *output_layer;
@ -194,20 +197,20 @@ struct layer{
    struct layer *input_h_layer;
    struct layer *state_h_layer;
-    float *z_cpu;
+    tree *softmax_tree;
    float *r_cpu;
    float *h_cpu;
    float *binary_input;
    size_t workspace_size;
    #ifdef GPU
    int *indexes_gpu;
    float *z_gpu;
    float *r_gpu;
    float *h_gpu;
-    int *indexes_gpu;
+    float *m_gpu;
    float *v_gpu;
    float * prev_state_gpu;
    float * forgot_state_gpu;
    float * forgot_delta_gpu;
--- a/src/parser.c
+++ b/src/parser.c
@ -826,7 +826,7 @@ void save_weights_upto(network net, char *filename, int cutoff)
    }
 #endif
    fprintf(stderr, "Saving weights to %s\n", filename);
-    FILE *fp = fopen(filename, "w");
+    FILE *fp = fopen(filename, "wb");
    if(!fp) file_error(filename);
    int major = 0;
--- a/src/region_layer.c
+++ b/src/region_layer.c
@ -9,11 +9,9 @@
 #include <string.h>
 #include <stdlib.h>
-#define DOABS 1
+layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
 region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
 {
-    region_layer l = {0};
+    layer l = {0};
    l.type = REGION;
    l.n = n;
@ -75,12 +73,8 @@ box get_region_box(float *x, float *biases, int n, int index, int i, int j, int
    box b;
    b.x = (i + logistic_activate(x[index + 0])) / w;
    b.y = (j + logistic_activate(x[index + 1])) / h;
-    b.w = exp(x[index + 2]) * biases[2*n];
+    b.w = exp(x[index + 2]) * biases[2*n]   / w;
-    b.h = exp(x[index + 3]) * biases[2*n+1];
+    b.h = exp(x[index + 3]) * biases[2*n+1] / h;
    if(DOABS){
        b.w = exp(x[index + 2]) * biases[2*n]   / w;
        b.h = exp(x[index + 3]) * biases[2*n+1] / h;
    }
    return b;
 }
@ -91,12 +85,8 @@ float delta_region_box(box truth, float *x, float *biases, int n, int index, int
    float tx = (truth.x*w - i);
    float ty = (truth.y*h - j);
-    float tw = log(truth.w / biases[2*n]);
+    float tw = log(truth.w*w / biases[2*n]);
-    float th = log(truth.h / biases[2*n + 1]);
+    float th = log(truth.h*h / biases[2*n + 1]);
    if(DOABS){
        tw = log(truth.w*w / biases[2*n]);
        th = log(truth.h*h / biases[2*n + 1]);
    }
    delta[index + 0] = scale * (tx - logistic_activate(x[index + 0])) * logistic_gradient(logistic_activate(x[index + 0]));
    delta[index + 1] = scale * (ty - logistic_activate(x[index + 1])) * logistic_gradient(logistic_activate(x[index + 1]));
@ -141,14 +131,14 @@ float tisnan(float x)
 }
 void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output);
-void forward_region_layer(const region_layer l, network_state state)
+void forward_region_layer(const layer l, network_state state)
 {
    int i,j,b,t,n;
    int size = l.coords + l.classes + 1;
    memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
-    #ifndef GPU
+#ifndef GPU
    flatten(l.output, l.w*l.h, size*l.n, l.batch, 1);
-    #endif
+#endif
    for (b = 0; b < l.batch; ++b){
        for(i = 0; i < l.h*l.w*l.n; ++i){
            int index = size*i + b*l.outputs;
@ -197,6 +187,7 @@ void forward_region_layer(const region_layer l, network_state state)
                    for(n = 0; n < l.n*l.w*l.h; ++n){
                        int index = size*n + b*l.outputs + 5;
                        float scale =  l.output[index-1];
                        l.delta[index - 1] = l.noobject_scale * ((0 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
                        float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class);
                        if(p > maxp){
                            maxp = p;
@ -205,6 +196,8 @@ void forward_region_layer(const region_layer l, network_state state)
                    }
                    int index = size*maxi + b*l.outputs + 5;
                    delta_region_class(l.output, l.delta, index, class, l.classes, l.softmax_tree, l.class_scale, &avg_cat);
                    if(l.output[index - 1] < .3) l.delta[index - 1] = l.object_scale * ((.3 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
                    else  l.delta[index - 1] = 0;
                    ++class_count;
                    onlyclass = 1;
                    break;
@ -218,39 +211,26 @@ void forward_region_layer(const region_layer l, network_state state)
                    int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
                    box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
                    float best_iou = 0;
                    int best_class = -1;
                    for(t = 0; t < 30; ++t){
                        box truth = float_to_box(state.truth + t*5 + b*l.truths);
                        if(!truth.x) break;
                        float iou = box_iou(pred, truth);
                        if (iou > best_iou) {
                            best_class = state.truth[t*5 + b*l.truths + 4];
                            best_iou = iou;
                        }
                    }
                    avg_anyobj += l.output[index + 4];
                    l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
-                    if(l.classfix == -1) l.delta[index + 4] = l.noobject_scale * ((best_iou - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
+                    if (best_iou > l.thresh) {
-                    else{
+                        l.delta[index + 4] = 0;
                        if (best_iou > l.thresh) {
                            l.delta[index + 4] = 0;
                            if(l.classfix > 0){
                                delta_region_class(l.output, l.delta, index + 5, best_class, l.classes, l.softmax_tree, l.class_scale*(l.classfix == 2 ? l.output[index + 4] : 1), &avg_cat);
                                ++class_count;
                            }
                        }
                    }
                    if(*(state.net.seen) < 12800){
                        box truth = {0};
                        truth.x = (i + .5)/l.w;
                        truth.y = (j + .5)/l.h;
-                        truth.w = l.biases[2*n];
+                        truth.w = l.biases[2*n]/l.w;
-                        truth.h = l.biases[2*n+1];
+                        truth.h = l.biases[2*n+1]/l.h;
                        if(DOABS){
                            truth.w = l.biases[2*n]/l.w;
                            truth.h = l.biases[2*n+1]/l.h;
                        }
                        delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01);
                    }
                }
@ -274,12 +254,8 @@ void forward_region_layer(const region_layer l, network_state state)
                int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
                box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
                if(l.bias_match){
-                    pred.w = l.biases[2*n];
+                    pred.w = l.biases[2*n]/l.w;
-                    pred.h = l.biases[2*n+1];
+                    pred.h = l.biases[2*n+1]/l.h;
                    if(DOABS){
                        pred.w = l.biases[2*n]/l.w;
                        pred.h = l.biases[2*n+1]/l.h;
                    }
                }
                //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h);
                pred.x = 0;
@ -313,19 +289,19 @@ void forward_region_layer(const region_layer l, network_state state)
        }
    }
    //printf("\n");
-    #ifndef GPU
+#ifndef GPU
    flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0);
-    #endif
+#endif
    *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
    printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f,  count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count);
 }
-void backward_region_layer(const region_layer l, network_state state)
+void backward_region_layer(const layer l, network_state state)
 {
    axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
 }
-void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map)
+void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh)
 {
    int i,j,n;
    float *predictions = l.output;
@ -336,7 +312,6 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
            int index = i*l.n + n;
            int p_index = index * (l.classes + 5) + 4;
            float scale = predictions[p_index];
            if(l.classfix == -1 && scale < .5) scale = 0;
            int box_index = index * (l.classes + 5);
            boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h);
            boxes[index].x *= w;
@ -348,22 +323,15 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
            if(l.softmax_tree){
                hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);
                int found = 0;
                if(map){
                    for(j = 0; j < 200; ++j){
                        float prob = scale*predictions[class_index+map[j]];
                        probs[index][j] = (prob > thresh) ? prob : 0;
                    }
                } else {
-                    for(j = l.classes - 1; j >= 0; --j){
+                    int j =  hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh);
-                        if(!found && predictions[class_index + j] > .5){
+                    probs[index][j] = (scale > thresh) ? scale : 0;
-                            found = 1;
+                    probs[index][l.classes] = scale;
                        } else {
                            predictions[class_index + j] = 0;
                        }
                        float prob = predictions[class_index+j];
                        probs[index][j] = (scale > thresh) ? prob : 0;
                    }
                }
            } else {
                for(j = 0; j < l.classes; ++j){
@ -380,7 +348,7 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
 #ifdef GPU
-void forward_region_layer_gpu(const region_layer l, network_state state)
+void forward_region_layer_gpu(const layer l, network_state state)
 {
    /*
       if(!state.train){
@ -421,7 +389,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state)
    if(cpu_state.truth) free(cpu_state.truth);
 }
-void backward_region_layer_gpu(region_layer l, network_state state)
+void backward_region_layer_gpu(layer l, network_state state)
 {
    flatten_ongpu(l.delta_gpu, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 0, state.delta);
 }
--- a/src/region_layer.h
+++ b/src/region_layer.h
@ -4,17 +4,15 @@
 #include "layer.h"
 #include "network.h"
-typedef layer region_layer;
+layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
-
+void forward_region_layer(const layer l, network_state state);
-region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
+void backward_region_layer(const layer l, network_state state);
-void forward_region_layer(const region_layer l, network_state state);
+void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh);
 void backward_region_layer(const region_layer l, network_state state);
 void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map);
 void resize_region_layer(layer *l, int w, int h);
 #ifdef GPU
-void forward_region_layer_gpu(const region_layer l, network_state state);
+void forward_region_layer_gpu(const layer l, network_state state);
-void backward_region_layer_gpu(region_layer l, network_state state);
+void backward_region_layer_gpu(layer l, network_state state);
 #endif
 #endif
--- a/src/server.c
+++ b/src/server.c
@ -1,205 +0,0 @@
 #include <stdio.h> /* needed for sockaddr_in */
 #include <string.h> /* needed for sockaddr_in */
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h> /* needed for sockaddr_in */
 #include <netdb.h>
 #include <pthread.h>
 #include <time.h>
 #include "mini_blas.h"
 #include "utils.h"
 #include "parser.h"
 #include "server.h"
 #include "connected_layer.h"
 #include "convolutional_layer.h"
 #define SERVER_PORT 9423
 #define STR(x) #x
 int socket_setup(int server)
 {
    int fd = 0;                         /* our socket */
    struct sockaddr_in me;      /* our address */
    /* create a UDP socket */
    if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
        error("cannot create socket");
    }
    /* bind the socket to any valid IP address and a specific port */
    if (server == 1){
        bzero((char *) &me, sizeof(me));
        me.sin_family = AF_INET;
        me.sin_addr.s_addr = htonl(INADDR_ANY);
        me.sin_port = htons(SERVER_PORT);
        if (bind(fd, (struct sockaddr *)&me, sizeof(me)) < 0) {
            error("bind failed");
        }
    }
    return fd;
 }
 typedef struct{
    int fd;
    int counter;
    network net;
 } connection_info;
 void read_and_add_into(int fd, float *a, int n)
 {
    float *buff = calloc(n, sizeof(float));
    read_all(fd, (char*) buff, n*sizeof(float));
    axpy_cpu(n, 1, buff, 1, a, 1);
    free(buff);
 }
 void handle_connection(void *pointer)
 {
    connection_info info = *(connection_info *) pointer;
    free(pointer);
    //printf("New Connection\n");
    if(info.counter%100==0){
        char buff[256];
        sprintf(buff, "unikitty/net_%d.part", info.counter);
        save_network(info.net, buff);
    }
    int fd = info.fd;
    network net = info.net;
    int i;
    for(i = 0; i < net.n; ++i){
        if(net.types[i] == CONVOLUTIONAL){
            convolutional_layer layer = *(convolutional_layer *) net.layers[i];
            read_and_add_into(fd, layer.bias_updates, layer.n);
            int num = layer.n*layer.c*layer.size*layer.size;
            read_and_add_into(fd, layer.filter_updates, num);
        }
        if(net.types[i] == CONNECTED){
            connected_layer layer = *(connected_layer *) net.layers[i];
            read_and_add_into(fd, layer.bias_updates, layer.outputs);
            read_and_add_into(fd, layer.weight_updates, layer.inputs*layer.outputs);
        }
    }
    for(i = 0; i < net.n; ++i){
        if(net.types[i] == CONVOLUTIONAL){
            convolutional_layer layer = *(convolutional_layer *) net.layers[i];
            update_convolutional_layer(layer);
            write_all(fd, (char*) layer.biases, layer.n*sizeof(float));
            int num = layer.n*layer.c*layer.size*layer.size;
            write_all(fd, (char*) layer.filters, num*sizeof(float));
        }
        if(net.types[i] == CONNECTED){
            connected_layer layer = *(connected_layer *) net.layers[i];
            update_connected_layer(layer);
            write_all(fd, (char *)layer.biases, layer.outputs*sizeof(float));
            write_all(fd, (char *)layer.weights, layer.outputs*layer.inputs*sizeof(float));
        }
    }
    //printf("Received updates\n");
    close(fd);
 }
 void server_update(network net)
 {
    int fd = socket_setup(1);
    int counter = 18000;
    listen(fd, 64);
    struct sockaddr_in client;     /* remote address */
    socklen_t client_size = sizeof(client);   /* length of addresses */
    time_t t=0;
    while(1){
        connection_info *info = calloc(1, sizeof(connection_info));
        info->net = net;
        info->counter = counter;
        pthread_t worker;
        int connection = accept(fd, (struct sockaddr *) &client, &client_size);
        if(!t) t=time(0);
        info->fd = connection;
        pthread_create(&worker, NULL, (void *) &handle_connection, info);
        ++counter;
        printf("%d\n", counter);
        //if(counter == 1024) break;
    }
    close(fd);
 }
 void client_update(network net, char *address)
 {
    int fd = socket_setup(0);
    struct hostent *hp;     /* host information */
    struct sockaddr_in server;    /* server address */
    /* fill in the server's address and data */
    bzero((char*)&server, sizeof(server));
    server.sin_family = AF_INET;
    server.sin_port = htons(SERVER_PORT);
    /* look up the address of the server given its name */
    hp = gethostbyname(address);
    if (!hp) {
        perror("no such host");
        fprintf(stderr, "could not obtain address of %s\n", "localhost");
    }
    /* put the host's address into the server address structure */
    memcpy((void *)&server.sin_addr, hp->h_addr_list[0], hp->h_length);
    if (connect(fd, (struct sockaddr *) &server, sizeof(server)) < 0) {
        error("error connecting");
    }
    /* send a message to the server */
    int i;
    //printf("Sending\n");
    for(i = 0; i < net.n; ++i){
        if(net.types[i] == CONVOLUTIONAL){
            convolutional_layer layer = *(convolutional_layer *) net.layers[i];
            write_all(fd, (char*) layer.bias_updates, layer.n*sizeof(float));
            int num = layer.n*layer.c*layer.size*layer.size;
            write_all(fd, (char*) layer.filter_updates, num*sizeof(float));
            memset(layer.bias_updates, 0, layer.n*sizeof(float));
            memset(layer.filter_updates, 0, num*sizeof(float));
        }
        if(net.types[i] == CONNECTED){
            connected_layer layer = *(connected_layer *) net.layers[i];
            write_all(fd, (char *)layer.bias_updates, layer.outputs*sizeof(float));
            write_all(fd, (char *)layer.weight_updates, layer.outputs*layer.inputs*sizeof(float));
            memset(layer.bias_updates, 0, layer.outputs*sizeof(float));
            memset(layer.weight_updates, 0, layer.inputs*layer.outputs*sizeof(float));
        }
    }
    //printf("Sent\n");
    for(i = 0; i < net.n; ++i){
        if(net.types[i] == CONVOLUTIONAL){
            convolutional_layer layer = *(convolutional_layer *) net.layers[i];
            read_all(fd, (char*) layer.biases, layer.n*sizeof(float));
            int num = layer.n*layer.c*layer.size*layer.size;
            read_all(fd, (char*) layer.filters, num*sizeof(float));
 #ifdef GPU
            push_convolutional_layer(layer);
            #endif
        }
        if(net.types[i] == CONNECTED){
            connected_layer layer = *(connected_layer *) net.layers[i];
            read_all(fd, (char *)layer.biases, layer.outputs*sizeof(float));
            read_all(fd, (char *)layer.weights, layer.outputs*layer.inputs*sizeof(float));
 #ifdef GPU
            push_connected_layer(layer);
            #endif
        }
    }
    //printf("Updated\n");
    close(fd);
 }
--- a/src/server.h
+++ b/src/server.h
@ -1,4 +0,0 @@
 #include "network.h"
 void client_update(network net, char *address);
 void server_update(network net);
--- a/src/tree.c
+++ b/src/tree.c
@ -50,6 +50,34 @@ void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leave
    }
 }
 int hierarchy_top_prediction(float *predictions, tree *hier, float thresh)
 {
    float p = 1;
    int group = 0;
    int i;
    while(1){
        float max = 0;
        int max_i = 0;
        for(i = 0; i < hier->group_size[group]; ++i){
            int index = i + hier->group_offset[group];
            float val = predictions[i + hier->group_offset[group]];
            if(val > max){
                max_i = index;
                max = val;
            }
        }
        if(p*max > thresh){
            p = p*max;
            group = hier->child[max_i];
            if(hier->child[max_i] < 0) return max_i;
        } else {
            return hier->parent[hier->group_offset[group]];
        }
    }
    return 0;
 }
 tree *read_tree(char *filename)
 {
    tree t = {0};
@ -67,6 +95,9 @@ tree *read_tree(char *filename)
        t.parent = realloc(t.parent, (n+1)*sizeof(int));
        t.parent[n] = parent;
        t.child = realloc(t.child, (n+1)*sizeof(int));
        t.child[n] = -1;
        t.name = realloc(t.name, (n+1)*sizeof(char *));
        t.name[n] = id;
        if(parent != last_parent){
@ -80,6 +111,9 @@ tree *read_tree(char *filename)
        }
        t.group = realloc(t.group, (n+1)*sizeof(int));
        t.group[n] = groups;
        if (parent >= 0) {
            t.child[parent] = groups;
        }
        ++n;
        ++group_size;
    }
--- a/src/tree.h
+++ b/src/tree.h
@ -5,6 +5,7 @@ typedef struct{
    int *leaf;
    int n;
    int *parent;
    int *child;
    int *group;
    char **name;
@ -16,6 +17,7 @@ typedef struct{
 tree *read_tree(char *filename);
 void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves);
 void change_leaves(tree *t, char *leaf_list);
 int hierarchy_top_prediction(float *predictions, tree *hier, float thresh);
 float get_hierarchy_probability(float *x, tree *hier, int c);
 #endif
--- a/src/yolo.c
+++ b/src/yolo.c
@ -351,5 +351,5 @@ void run_yolo(int argc, char **argv)
    else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
    else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
-    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix);
+    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, .5);
 }
--- a/src/yolo_kernels.cu
+++ b/src/yolo_kernels.cu
@ -1,132 +0,0 @@
 #include "cuda_runtime.h"
 #include "curand.h"
 #include "cublas_v2.h"
 extern "C" {
 #include "network.h"
 #include "detection_layer.h"
 #include "cost_layer.h"
 #include "utils.h"
 #include "parser.h"
 #include "box.h"
 #include "image.h"
 #include <sys/time.h>
 }
 #ifdef OPENCV
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 extern "C" image ipl_to_image(IplImage* src);
 extern "C" void convert_yolo_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
 extern "C" char *voc_names[];
 extern "C" image voc_labels[];
 static float **probs;
 static box *boxes;
 static network net;
 static image in   ;
 static image in_s ;
 static image det  ;
 static image det_s;
 static image disp ;
 static cv::VideoCapture cap;
 static float fps = 0;
 static float demo_thresh = 0;
 void *fetch_in_thread(void *ptr)
 {
    cv::Mat frame_m;
    cap >> frame_m;
    IplImage frame = frame_m;
    in = ipl_to_image(&frame);
    rgbgr_image(in);
    in_s = resize_image(in, net.w, net.h);
    return 0;
 }
 void *detect_in_thread(void *ptr)
 {
    float nms = .4;
    detection_layer l = net.layers[net.n-1];
    float *X = det_s.data;
    float *predictions = network_predict(net, X);
    free_image(det_s);
    convert_yolo_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, demo_thresh, probs, boxes, 0);
    if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms);
    printf("\033[2J");
    printf("\033[1;1H");
    printf("\nFPS:%.0f\n",fps);
    printf("Objects:\n\n");
    draw_detections(det, l.side*l.side*l.n, demo_thresh, boxes, probs, voc_names, voc_labels, 20);
    return 0;
 }
 extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index)
 {
    demo_thresh = thresh;
    printf("YOLO demo\n");
    net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    set_batch_network(&net, 1);
    srand(2222222);
    cv::VideoCapture cam(cam_index);
    cap = cam;
    if(!cap.isOpened()) error("Couldn't connect to webcam.\n");
    detection_layer l = net.layers[net.n-1];
    int j;
    boxes = (box *)calloc(l.side*l.side*l.n, sizeof(box));
    probs = (float **)calloc(l.side*l.side*l.n, sizeof(float *));
    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *));
    pthread_t fetch_thread;
    pthread_t detect_thread;
    fetch_in_thread(0);
    det = in;
    det_s = in_s;
    fetch_in_thread(0);
    detect_in_thread(0);
    disp = det;
    det = in;
    det_s = in_s;
    cvNamedWindow("YOLO", CV_WINDOW_NORMAL); 
    cvMoveWindow("YOLO", 0, 0);
    cvResizeWindow("YOLO", 1352, 1013);
    while(1){
        struct timeval tval_before, tval_after, tval_result;
        gettimeofday(&tval_before, NULL);
        if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
        if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
        show_image(disp, "YOLO");
        free_image(disp);
        cvWaitKey(1);
        pthread_join(fetch_thread, 0);
        pthread_join(detect_thread, 0);
        disp  = det;
        det   = in;
        det_s = in_s;
        gettimeofday(&tval_after, NULL);
        timersub(&tval_after, &tval_before, &tval_result);
        float curr = 1000000.f/((long int)tval_result.tv_usec);
        fps = .9*fps + .1*curr;
    }
 }
 #else
 extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index){
    fprintf(stderr, "YOLO demo needs OpenCV for webcam images.\n");
 }
 #endif
		`@ -0,0 +1,80 @@`
							`5177`
							`3768`
							`3802`
							`3800`
							`4107`
							`4072`
							`4071`
							`3797`
							`4097`
							`2645`
							`5150`
							`2644`
							`3257`
							`2523`
							`6527`
							`6866`
							`6912`
							`7342`
							`7255`
							`7271`
							`7217`
							`6858`
							`7343`
							`7233`
							`3704`
							`4374`
							`3641`
							`5001`
							`3899`
							`2999`
							`2631`
							`5141`
							`2015`
							`1133`
							`1935`
							`1930`
							`5144`
							`5143`
							`2371`
							`3916`
							`3745`
							`3640`
							`4749`
							`4736`
							`4735`
							`3678`
							`58`
							`42`
							`771`
							`81`
							`152`
							`141`
							`786`
							`700`
							`218`
							`791`
							`2518`
							`2521`
							`3637`
							`2458`
							`2505`
							`2519`
							`3499`
							`2837`
							`3503`
							`2597`
							`3430`
							`2080`
							`5103`
							`5111`
							`5102`
							`3013`
							`5096`
							`1102`
							`3218`
							`4010`
							`2266`
							`1127`
							`5122`
							`2360`
		`@ -0,0 +1,200 @@`
							`2687`
							`4107`
							`8407`
							`7254`
							`42`
							`6797`
							`127`
							`2268`
							`2442`
							`3704`
							`260`
							`1970`
							`58`
							`4443`
							`2661`
							`2043`
							`2039`
							`4858`
							`4007`
							`6858`
							`8408`
							`166`
							`2523`
							`3768`
							`4347`
							`6527`
							`2446`
							`5005`
							`3274`
							`3678`
							`4918`
							`709`
							`4072`
							`8428`
							`7223`
							`2251`
							`3802`
							`3848`
							`7271`
							`2677`
							`8267`
							`2849`
							`2518`
							`2738`
							`3746`
							`5105`
							`3430`
							`3503`
							`2249`
							`1841`
							`2032`
							`2358`
							`122`
							`3984`
							`4865`
							`3246`
							`5095`
							`6912`
							`6878`
							`8467`
							`2741`
							`1973`
							`3057`
							`7217`
							`1872`
							`44`
							`2452`
							`3637`
							`2704`
							`6917`
							`2715`
							`6734`
							`2325`
							`6864`
							`6677`
							`2035`
							`1949`
							`338`
							`2664`
							`5122`
							`1844`
							`784`
							`2223`
							`7188`
							`2719`
							`2670`
							`4830`
							`158`
							`4818`
							`7228`
							`1965`
							`7342`
							`786`
							`2095`
							`8281`
							`8258`
							`7406`
							`3915`
							`8382`
							`2437`
							`2837`
							`82`
							`6871`
							`1876`
							`7447`
							`8285`
							`5007`
							`2740`
							`3463`
							`5103`
							`3755`
							`4910`
							`6809`
							`3800`
							`118`
							`3396`
							`3092`
							`2709`
							`81`
							`7105`
							`4036`
							`2366`
							`1846`
							`5177`
							`2684`
							`64`
							`2041`
							`3919`
							`700`
							`3724`
							`1742`
							`39`
							`807`
							`7184`
							`2256`
							`235`
							`2778`
							`2996`
							`2030`
							`3714`
							`7167`
							`2369`
							`6705`
							`6861`
							`5096`
							`2597`
							`2166`
							`2036`
							`3228`
							`3747`
							`2711`
							`8300`
							`2226`
							`7153`
							`7255`
							`2631`
							`7109`
							`8242`
							`7445`
							`3776`
							`3803`
							`3690`
							`2025`
							`2521`
							`2316`
							`7190`
							`8249`
							`3352`
							`2639`
							`2887`
							`100`
							`4219`
							`3344`
							`5008`
							`7224`
							`3351`
							`2434`
							`2074`
							`2034`
							`8304`
							`5004`
							`6868`
							`5102`
							`2645`
							`4071`
							`2716`
							`2717`
							`7420`
							`3499`
							`3763`
							`5084`
							`2676`
							`2046`
							`5107`
							`5097`
							`3944`
							`4097`
							`7132`
							`3956`
							`7343`