this'll teach me to mess with maxpooling

2023-08-10 21:13:14 +03:00 · 2018-08-03 15:57:48 -07:00 · 2018-08-03 15:57:48 -07:00 · b13f67bfdd
commit b13f67bfdd
parent e209b3bbbf
23 changed files with 737 additions and 130 deletions
--- a/4
+++ b/4
@ -57,8 +57,8 @@ CFLAGS+= -DCUDNN
 LDFLAGS+= -lcudnn
 endif
-OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o  lstm_layer.o l2norm_layer.o yolo_layer.o
+OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o  lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o
-EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o darknet.o
+EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o
 ifeq ($(GPU), 1) 
 LDFLAGS+= -lstdc++ 
 OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o
--- a/examples/art.c
+++ b/examples/art.c
@ -24,7 +24,6 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)
    while(1){
        image in = get_image_from_stream(cap);
        image in_s = resize_image(in, net->w, net->h);
        show_image(in, window);
        float *p = network_predict(net, in_s.data);
@ -45,10 +44,9 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)
        }
        printf("]\n");
        show_image(in, window, 1);
        free_image(in_s);
        free_image(in);
        cvWaitKey(1);
    }
 #endif
 }
--- a/examples/classifier.c
+++ b/examples/classifier.c
@ -645,6 +645,45 @@ void label_classifier(char *datacfg, char *filename, char *weightfile)
    }
 }
 void csv_classifier(char *datacfg, char *cfgfile, char *weightfile)
 {
    int i,j;
    network *net = load_network(cfgfile, weightfile, 0);
    srand(time(0));
    list *options = read_data_cfg(datacfg);
    char *test_list = option_find_str(options, "test", "data/test.list");
    int top = option_find_int(options, "top", 1);
    list *plist = get_paths(test_list);
    char **paths = (char **)list_to_array(plist);
    int m = plist->size;
    free_list(plist);
    int *indexes = calloc(top, sizeof(int));
    for(i = 0; i < m; ++i){
        double time = what_time_is_it_now();
        char *path = paths[i];
        image im = load_image_color(path, 0, 0);
        image r = letterbox_image(im, net->w, net->h);
        float *predictions = network_predict(net, r.data);
        if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
        top_k(predictions, net->outputs, top, indexes);
        printf("%s", path);
        for(j = 0; j < top; ++j){
            printf("\t%d", indexes[j]);
        }
        printf("\n");
        free_image(im);
        free_image(r);
        fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m);
    }
 }
 void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer)
 {
@ -869,8 +908,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
        }
        if(1){
-            show_image(out, "Threat");
+            show_image(out, "Threat", 10);
            cvWaitKey(10);
        }
        free_image(in_s);
        free_image(in);
@ -922,7 +960,6 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
        image in = get_image_from_stream(cap);
        image in_s = resize_image(in, net->w, net->h);
        show_image(in, "Threat Detection");
        float *predictions = network_predict(net, in_s.data);
        top_predictions(net, top, indexes);
@ -947,11 +984,10 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
            }
        }
        show_image(in, "Threat Detection", 10);
        free_image(in_s);
        free_image(in);
        cvWaitKey(10);
        gettimeofday(&tval_after, NULL);
        timersub(&tval_after, &tval_before, &tval_result);
        float curr = 1000000.f/((long int)tval_result.tv_usec);
@ -1036,12 +1072,10 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
            free_image(label);
        }
-        show_image(in, base);
+        show_image(in, base, 10);
        free_image(in_s);
        free_image(in);
        cvWaitKey(10);
        gettimeofday(&tval_after, NULL);
        timersub(&tval_after, &tval_before, &tval_result);
        float curr = 1000000.f/((long int)tval_result.tv_usec);
@ -1080,6 +1114,7 @@ void run_classifier(int argc, char **argv)
    else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
    else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
    else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
    else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights);
    else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights);
    else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights);
--- a/examples/coco.c
+++ b/examples/coco.c
@ -325,14 +325,10 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
        draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80);
        save_image(im, "prediction");
-        show_image(im, "predictions");
+        show_image(im, "predictions", 0);
        free_detections(dets, nboxes);
        free_image(im);
        free_image(sized);
 #ifdef OPENCV
        cvWaitKey(0);
        cvDestroyAllWindows();
 #endif
        if (filename) break;
    }
 }
--- a/examples/darknet.c
+++ b/examples/darknet.c
@ -14,6 +14,7 @@ extern void run_nightmare(int argc, char **argv);
 extern void run_classifier(int argc, char **argv);
 extern void run_regressor(int argc, char **argv);
 extern void run_segmenter(int argc, char **argv);
 extern void run_isegmenter(int argc, char **argv);
 extern void run_char_rnn(int argc, char **argv);
 extern void run_tag(int argc, char **argv);
 extern void run_cifar(int argc, char **argv);
@ -452,6 +453,8 @@ int main(int argc, char **argv)
        run_classifier(argc, argv);
    } else if (0 == strcmp(argv[1], "regressor")){
        run_regressor(argc, argv);
    } else if (0 == strcmp(argv[1], "isegmenter")){
        run_isegmenter(argc, argv);
    } else if (0 == strcmp(argv[1], "segmenter")){
        run_segmenter(argc, argv);
    } else if (0 == strcmp(argv[1], "art")){
--- a/examples/detector.c
+++ b/examples/detector.c
@ -613,9 +613,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
            if(fullscreen){
                cvSetWindowProperty("predictions", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
            }
-            show_image(im, "predictions");
+            show_image(im, "predictions", 0);
            cvWaitKey(0);
            cvDestroyAllWindows();
 #endif
        }
--- a/examples/instance-segmenter.c
+++ b/examples/instance-segmenter.c
@ -0,0 +1,265 @@
 #include "darknet.h"
 #include <sys/time.h>
 #include <assert.h>
 void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display)
 {
    int i;
    float avg_loss = -1;
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
    network **nets = calloc(ngpus, sizeof(network*));
    srand(time(0));
    int seed = rand();
    for(i = 0; i < ngpus; ++i){
        srand(seed);
 #ifdef GPU
        cuda_set_device(gpus[i]);
 #endif
        nets[i] = load_network(cfgfile, weightfile, clear);
        nets[i]->learning_rate *= ngpus;
    }
    srand(time(0));
    network *net = nets[0];
    image pred = get_network_image(net);
    int div = net->w/pred.w;
    assert(pred.w * div == net->w);
    assert(pred.h * div == net->h);
    int imgs = net->batch * net->subdivisions * ngpus;
    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    list *options = read_data_cfg(datacfg);
    char *backup_directory = option_find_str(options, "backup", "/backup/");
    char *train_list = option_find_str(options, "train", "data/train.list");
    list *plist = get_paths(train_list);
    char **paths = (char **)list_to_array(plist);
    printf("%d\n", plist->size);
    int N = plist->size;
    load_args args = {0};
    args.w = net->w;
    args.h = net->h;
    args.threads = 32;
    args.scale = div;
    args.num_boxes = 90;
    args.min = net->min_crop;
    args.max = net->max_crop;
    args.angle = net->angle;
    args.aspect = net->aspect;
    args.exposure = net->exposure;
    args.saturation = net->saturation;
    args.hue = net->hue;
    args.size = net->w;
    args.classes = 80;
    args.paths = paths;
    args.n = imgs;
    args.m = N;
    args.type = ISEG_DATA;
    data train;
    data buffer;
    pthread_t load_thread;
    args.d = &buffer;
    load_thread = load_data(args);
    int epoch = (*net->seen)/N;
    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        double time = what_time_is_it_now();
        pthread_join(load_thread, 0);
        train = buffer;
        load_thread = load_data(args);
        printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
        time = what_time_is_it_now();
        float loss = 0;
 #ifdef GPU
        if(ngpus == 1){
            loss = train_network(net, train);
        } else {
            loss = train_networks(nets, ngpus, train, 4);
        }
 #else
        loss = train_network(net, train);
 #endif
        if(display){
            image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]);
            image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]);
            pred.c = 80;
            image mask = mask_to_rgb(tr);
            image prmask = mask_to_rgb(pred);
            show_image(im, "input", 1);
            show_image(prmask, "pred", 1);
            show_image(mask, "truth", 100);
            free_image(mask);
            free_image(prmask);
        }
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
        free_data(train);
        if(*net->seen/N > epoch){
            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
        }
        if(get_current_batch(net)%100 == 0){
            char buff[256];
            sprintf(buff, "%s/%s.backup",backup_directory,base);
            save_weights(net, buff);
        }
    }
    char buff[256];
    sprintf(buff, "%s/%s.weights", backup_directory, base);
    save_weights(net, buff);
    free_network(net);
    free_ptrs((void**)paths, plist->size);
    free_list(plist);
    free(base);
 }
 void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename)
 {
    network *net = load_network(cfg, weights, 0);
    set_batch_network(net, 1);
    srand(2222222);
    clock_t time;
    char buff[256];
    char *input = buff;
    while(1){
        if(filename){
            strncpy(input, filename, 256);
        }else{
            printf("Enter Image Path: ");
            fflush(stdout);
            input = fgets(input, 256, stdin);
            if(!input) return;
            strtok(input, "\n");
        }
        image im = load_image_color(input, 0, 0);
        image sized = letterbox_image(im, net->w, net->h);
        float *X = sized.data;
        time=clock();
        float *predictions = network_predict(net, X);
        image pred = get_network_image(net);
        image prmask = mask_to_rgb(pred);
        printf("Predicted: %f\n", predictions[0]);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        show_image(sized, "orig", 1);
        show_image(prmask, "pred", 0);
        free_image(im);
        free_image(sized);
        free_image(prmask);
        if (filename) break;
    }
 }
 void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename)
 {
 #ifdef OPENCV
    printf("Classifier Demo\n");
    network *net = load_network(cfg, weights, 0);
    set_batch_network(net, 1);
    srand(2222222);
    CvCapture * cap;
    if(filename){
        cap = cvCaptureFromFile(filename);
    }else{
        cap = cvCaptureFromCAM(cam_index);
    }
    if(!cap) error("Couldn't connect to webcam.\n");
    cvNamedWindow("Segmenter", CV_WINDOW_NORMAL); 
    cvResizeWindow("Segmenter", 512, 512);
    float fps = 0;
    while(1){
        struct timeval tval_before, tval_after, tval_result;
        gettimeofday(&tval_before, NULL);
        image in = get_image_from_stream(cap);
        image in_s = letterbox_image(in, net->w, net->h);
        network_predict(net, in_s.data);
        printf("\033[2J");
        printf("\033[1;1H");
        printf("\nFPS:%.0f\n",fps);
        image pred = get_network_image(net);
        image prmask = mask_to_rgb(pred);
        show_image(prmask, "Segmenter", 10);
        free_image(in_s);
        free_image(in);
        free_image(prmask);
        gettimeofday(&tval_after, NULL);
        timersub(&tval_after, &tval_before, &tval_result);
        float curr = 1000000.f/((long int)tval_result.tv_usec);
        fps = .9*fps + .1*curr;
    }
 #endif
 }
 void run_isegmenter(int argc, char **argv)
 {
    if(argc < 4){
        fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
        return;
    }
    char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
    int *gpus = 0;
    int gpu = 0;
    int ngpus = 0;
    if(gpu_list){
        printf("%s\n", gpu_list);
        int len = strlen(gpu_list);
        ngpus = 1;
        int i;
        for(i = 0; i < len; ++i){
            if (gpu_list[i] == ',') ++ngpus;
        }
        gpus = calloc(ngpus, sizeof(int));
        for(i = 0; i < ngpus; ++i){
            gpus[i] = atoi(gpu_list);
            gpu_list = strchr(gpu_list, ',')+1;
        }
    } else {
        gpu = gpu_index;
        gpus = &gpu;
        ngpus = 1;
    }
    int cam_index = find_int_arg(argc, argv, "-c", 0);
    int clear = find_arg(argc, argv, "-clear");
    int display = find_arg(argc, argv, "-display");
    char *data = argv[3];
    char *cfg = argv[4];
    char *weights = (argc > 5) ? argv[5] : 0;
    char *filename = (argc > 6) ? argv[6]: 0;
    if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename);
    else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display);
    else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename);
 }
--- a/examples/lsd.c
+++ b/examples/lsd.c
@ -460,13 +460,9 @@ void inter_dcgan(char *cfgfile, char *weightfile)
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        //char buff[256];
        sprintf(buff, "out%05d", c);
        show_image(out, "out");
        save_image(out, "out");
        save_image(out, buff);
-#ifdef OPENCV
+        show_image(out, "out", 0);
        //cvWaitKey(0);
 #endif
    }
 }
@ -499,11 +495,8 @@ void test_dcgan(char *cfgfile, char *weightfile)
        //yuv_to_rgb(out);
        normalize_image(out);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        show_image(out, "out");
        save_image(out, "out");
-#ifdef OPENCV
+        show_image(out, "out", 0);
        cvWaitKey(0);
 #endif
        free_image(im);
    }
@ -639,11 +632,10 @@ void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, i
        if(display){
            image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]);
            image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
-            show_image(im, "gen");
+            show_image(im, "gen", 1);
-            show_image(im2, "train");
+            show_image(im2, "train", 1);
            save_image(im, "gen");
            save_image(im2, "train");
            cvWaitKey(1);
        }
 #endif
@ -826,11 +818,10 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
        if(display){
            image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]);
            image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
-            show_image(im, "gen");
+            show_image(im, "gen", 1);
-            show_image(im2, "train");
+            show_image(im2, "train", 1);
            save_image(im, "gen");
            save_image(im2, "train");
            cvWaitKey(1);
        }
 #endif
@ -1010,9 +1001,8 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
        if(display){
            image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]);
            image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
-            show_image(im, "gen");
+            show_image(im, "gen", 1);
-            show_image(im2, "train");
+            show_image(im2, "train", 1);
            cvWaitKey(1);
        }
 #endif
        free_data(merge);
@ -1342,12 +1332,9 @@ void test_lsd(char *cfg, char *weights, char *filename, int gray)
        //yuv_to_rgb(out);
        constrain_image(out);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        show_image(out, "out");
        show_image(crop, "crop");
        save_image(out, "out");
-#ifdef OPENCV
+        show_image(out, "out", 1);
-        cvWaitKey(0);
+        show_image(crop, "crop", 0);
 #endif
        free_image(im);
        free_image(resized);
--- a/examples/nightmare.c
+++ b/examples/nightmare.c
@ -376,10 +376,7 @@ void run_nightmare(int argc, char **argv)
            if(reconstruct){
                reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1);
                //if ((n+1)%30 == 0) rate *= .5;
-                show_image(im, "reconstruction");
+                show_image(im, "reconstruction", 10);
 #ifdef OPENCV
                cvWaitKey(10);
 #endif
            }else{
                int layer = max_layer + rand()%range - range/2;
                int octave = rand()%octaves;
@ -400,8 +397,7 @@ void run_nightmare(int argc, char **argv)
        }
        printf("%d %s\n", e, buff);
        save_image(im, buff);
-        //show_image(im, buff);
+        //show_image(im, buff, 0);
        //cvWaitKey(0);
        if(rotate){
            image rot = rotate_image(im, rotate);
--- a/examples/regressor.c
+++ b/examples/regressor.c
@ -179,7 +179,6 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
        image in = get_image_from_stream(cap);
        image crop = center_crop_image(in, net->w, net->h);
        grayscale_image_3c(crop);
        show_image(crop, "Regressor");
        float *predictions = network_predict(net, crop.data);
@ -192,11 +191,10 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
            printf("%s: %f\n", names[i], predictions[i]);
        }
        show_image(crop, "Regressor", 10);
        free_image(in);
        free_image(crop);
        cvWaitKey(10);
        gettimeofday(&tval_after, NULL);
        timersub(&tval_after, &tval_before, &tval_result);
        float curr = 1000000.f/((long int)tval_result.tv_usec);
--- a/examples/segmenter.c
+++ b/examples/segmenter.c
@ -42,7 +42,6 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    char **paths = (char **)list_to_array(plist);
    printf("%d\n", plist->size);
    int N = plist->size;
    clock_t time;
    load_args args = {0};
    args.w = net->w;
@ -73,14 +72,14 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    int epoch = (*net->seen)/N;
    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
-        time=clock();
+        double time = what_time_is_it_now();
        pthread_join(load_thread, 0);
        train = buffer;
        load_thread = load_data(args);
-        printf("Loaded: %lf seconds\n", sec(clock()-time));
+        printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
-        time=clock();
+        time = what_time_is_it_now();
        float loss = 0;
 #ifdef GPU
@ -97,18 +96,15 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
            image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]);
            image mask = mask_to_rgb(tr);
            image prmask = mask_to_rgb(pred);
-            show_image(im, "input");
+            show_image(im, "input", 1);
-            show_image(prmask, "pred");
+            show_image(prmask, "pred", 1);
-            show_image(mask, "truth");
+            show_image(mask, "truth", 100);
 #ifdef OPENCV
            cvWaitKey(100);
 #endif
            free_image(mask);
            free_image(prmask);
        }
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
-        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
        free_data(train);
        if(*net->seen/N > epoch){
            epoch = *net->seen/N;
@ -159,13 +155,10 @@ void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename)
        float *predictions = network_predict(net, X);
        image pred = get_network_image(net);
        image prmask = mask_to_rgb(pred);
        show_image(sized, "orig");
        show_image(prmask, "pred");
 #ifdef OPENCV
        cvWaitKey(0);
 #endif
        printf("Predicted: %f\n", predictions[0]);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        show_image(sized, "orig", 1);
        show_image(prmask, "pred", 0);
        free_image(im);
        free_image(sized);
        free_image(prmask);
@ -210,14 +203,12 @@ void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, cons
        image pred = get_network_image(net);
        image prmask = mask_to_rgb(pred);
-        show_image(prmask, "Segmenter");
+        show_image(prmask, "Segmenter", 10);
        free_image(in_s);
        free_image(in);
        free_image(prmask);
        cvWaitKey(10);
        gettimeofday(&tval_after, NULL);
        timersub(&tval_after, &tval_before, &tval_result);
        float curr = 1000000.f/((long int)tval_result.tv_usec);
--- a/examples/super.c
+++ b/examples/super.c
@ -93,7 +93,7 @@ void test_super(char *cfgfile, char *weightfile, char *filename)
        image out = get_network_image(net);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        save_image(out, "out");
-        show_image(out, "out");
+        show_image(out, "out", 0);
        free_image(im);
        if (filename) break;
--- a/examples/yolo.c
+++ b/examples/yolo.c
@ -296,14 +296,10 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
        draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20);
        save_image(im, "predictions");
-        show_image(im, "predictions");
+        show_image(im, "predictions", 0);
        free_detections(dets, nboxes);
        free_image(im);
        free_image(sized);
 #ifdef OPENCV
        cvWaitKey(0);
        cvDestroyAllWindows();
 #endif
        if (filename) break;
    }
 }
--- a/include/darknet.h
+++ b/include/darknet.h
@ -86,6 +86,7 @@ typedef enum {
    XNOR,
    REGION,
    YOLO,
    ISEG,
    REORG,
    UPSAMPLE,
    LOGXENT,
@ -166,6 +167,7 @@ struct layer{
    float ratio;
    float learning_rate_scale;
    float clip;
    int noloss;
    int softmax;
    int classes;
    int coords;
@ -203,6 +205,7 @@ struct layer{
    int dontload;
    int dontsave;
    int dontloadscales;
    int numload;
    float temperature;
    float probability;
@ -213,6 +216,8 @@ struct layer{
    int   * input_layers;
    int   * input_sizes;
    int   * map;
    int   * counts;
    float ** sums;
    float * rand;
    float * cost;
    float * state;
@ -540,7 +545,7 @@ typedef struct{
 } data;
 typedef enum {
-    CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA
+    CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA
 } data_type;
 typedef struct load_args{
@ -705,7 +710,7 @@ int resize_network(network *net, int w, int h);
 void free_matrix(matrix m);
 void test_resize(char *filename);
 void save_image(image p, const char *name);
-void show_image(image p, const char *name);
+int show_image(image p, const char *name, int ms);
 image copy_image(image p);
 void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
 float get_current_rate(network *net);
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -151,7 +151,7 @@ void cudnn_convolutional_setup(layer *l)
            l->convDesc,
            l->dstTensorDesc,
            CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
-            4000000000,
+            2000000000,
            &l->fw_algo);
    cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
            l->weightDesc,
@ -159,7 +159,7 @@ void cudnn_convolutional_setup(layer *l)
            l->convDesc,
            l->dsrcTensorDesc,
            CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
-            4000000000,
+            2000000000,
            &l->bd_algo);
    cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
            l->srcTensorDesc,
@ -167,7 +167,7 @@ void cudnn_convolutional_setup(layer *l)
            l->convDesc,
            l->dweightDesc,
            CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
-            4000000000,
+            2000000000,
            &l->bf_algo);
 }
 #endif
--- a/src/data.c
+++ b/src/data.c
@ -361,6 +361,44 @@ box bound_image(image im)
 }
 void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh)
 {
    char labelpath[4096];
    find_replace(path, "images", "mask", labelpath);
    find_replace(labelpath, "JPEGImages", "mask", labelpath);
    find_replace(labelpath, ".jpg", ".txt", labelpath);
    find_replace(labelpath, ".JPG", ".txt", labelpath);
    find_replace(labelpath, ".JPEG", ".txt", labelpath);
    FILE *file = fopen(labelpath, "r");
    if(!file) file_error(labelpath);
    char buff[32788];
    int id;
    int i = 0;
    int j;
    image part = make_image(w, h, 1);
    while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){
        int n = 0;
        int *rle = read_intlist(buff, &n, 0);
        load_rle(part, rle, n);
        image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect);
        if(flip) flip_image(sized);
        image mask = resize_image(sized, mw, mh);
        truth[i*(mw*mh+1)] = id;
        for(j = 0; j < mw*mh; ++j){
            truth[i*(mw*mh + 1) + 1 + j] = mask.data[j];
        }
        ++i;
        free_image(mask);
        free_image(sized);
        free(rle);
    }
    if(i < num_boxes) truth[i*(mw*mh+1)] = -1;
    fclose(file);
    free_image(part);
 }
 void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh)
 {
    char labelpath[4096];
    find_replace(path, "images", "mask", labelpath);
@ -743,7 +781,47 @@ data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int mi
    return d;
 }
-data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure)
+data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure)
 {
    char **random_paths = get_random_paths(paths, n, m);
    int i;
    data d = {0};
    d.shallow = 0;
    d.X.rows = n;
    d.X.vals = calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*3;
    d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes);
    for(i = 0; i < n; ++i){
        image orig = load_image_color(random_paths[i], 0, 0);
        augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h);
        image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect);
        int flip = rand()%2;
        if(flip) flip_image(sized);
        random_distort_image(sized, hue, saturation, exposure);
        d.X.vals[i] = sized.data;
        //show_image(sized, "image");
        fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div);
        free_image(orig);
        /*
           image rgb = mask_to_rgb(sized_m, classes);
           show_image(rgb, "part");
           show_image(sized, "orig");
           cvWaitKey(0);
           free_image(rgb);
         */
    }
    free(random_paths);
    return d;
 }
 data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure)
 {
    char **random_paths = get_random_paths(paths, n, m);
    int i;
@ -767,7 +845,7 @@ data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int b
        d.X.vals[i] = sized.data;
        //show_image(sized, "image");
-        fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14);
+        fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14);
        free_image(orig);
@ -975,7 +1053,8 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
        float dh = jitter * orig.h;
        float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh));
-        float scale = rand_uniform(.25, 2);
+        //float scale = rand_uniform(.25, 2);
        float scale = 1;
        float nw, nh;
@ -1025,8 +1104,10 @@ void *load_thread(void *ptr)
        *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
    } else if (a.type == WRITING_DATA){
        *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
    } else if (a.type == ISEG_DATA){
        *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
    } else if (a.type == INSTANCE_DATA){
-        *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
+        *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
    } else if (a.type == SEGMENTATION_DATA){
        *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale);
    } else if (a.type == REGION_DATA){
--- a/src/image.c
+++ b/src/image.c
@ -572,7 +572,7 @@ void show_image_cv(image p, const char *name, IplImage *disp)
 }
 #endif
-void show_image(image p, const char *name)
+int show_image(image p, const char *name, int ms)
 {
 #ifdef OPENCV
    IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
@ -581,9 +581,13 @@ void show_image(image p, const char *name)
    show_image_cv(copy, name, disp);
    free_image(copy);
    cvReleaseImage(&disp);
    int c = cvWaitKey(ms);
    if (c != -1) c = c%256;
    return c;
 #else
    fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
    save_image(p, name);
    return 0;
 #endif
 }
@ -727,7 +731,7 @@ void show_image_layers(image p, char *name)
    for(i = 0; i < p.c; ++i){
        sprintf(buff, "%s - Layer %d", name, i);
        image layer = get_image_layer(p, i);
-        show_image(layer, buff);
+        show_image(layer, buff, 1);
        free_image(layer);
    }
 }
@ -735,7 +739,7 @@ void show_image_layers(image p, char *name)
 void show_image_collapsed(image p, char *name)
 {
    image c = collapse_image_layers(p, 1);
-    show_image(c, name);
+    show_image(c, name, 1);
    free_image(c);
 }
@ -1406,16 +1410,16 @@ void test_resize(char *filename)
    distort_image(c4, .1, .66666, 1.5);
-    show_image(im,   "Original");
+    show_image(im,   "Original", 1);
-    show_image(gray, "Gray");
+    show_image(gray, "Gray", 1);
-    show_image(c1, "C1");
+    show_image(c1, "C1", 1);
-    show_image(c2, "C2");
+    show_image(c2, "C2", 1);
-    show_image(c3, "C3");
+    show_image(c3, "C3", 1);
-    show_image(c4, "C4");
+    show_image(c4, "C4", 1);
 #ifdef OPENCV
    while(1){
        image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320);
-        show_image(aug, "aug");
+        show_image(aug, "aug", 1);
        free_image(aug);
@ -1430,7 +1434,7 @@ void test_resize(char *filename)
        float dhue = rand_uniform(-hue, hue);
        distort_image(c, dhue, dsat, dexp);
-        show_image(c, "rand");
+        show_image(c, "rand", 1);
        printf("%f %f %f\n", dhue, dsat, dexp);
        free_image(c);
        cvWaitKey(0);
@ -1585,7 +1589,7 @@ void show_image_normalized(image im, const char *name)
 {
    image c = copy_image(im);
    normalize_image(c);
-    show_image(c, name);
+    show_image(c, name, 1);
    free_image(c);
 }
@ -1603,7 +1607,7 @@ void show_images(image *ims, int n, char *window)
     */
    normalize_image(m);
    save_image(m, window);
-    show_image(m, window);
+    show_image(m, window, 1);
    free_image(m);
 }
--- a/src/iseg_layer.c
+++ b/src/iseg_layer.c
@ -0,0 +1,219 @@
 #include "iseg_layer.h"
 #include "activations.h"
 #include "blas.h"
 #include "box.h"
 #include "cuda.h"
 #include "utils.h"
 #include <stdio.h>
 #include <assert.h>
 #include <string.h>
 #include <stdlib.h>
 layer make_iseg_layer(int batch, int w, int h, int classes, int ids)
 {
    layer l = {0};
    l.type = ISEG;
    l.h = h;
    l.w = w;
    l.c = classes + ids;
    l.out_w = l.w;
    l.out_h = l.h;
    l.out_c = l.c;
    l.classes = classes;
    l.batch = batch;
    l.extra = ids;
    l.cost = calloc(1, sizeof(float));
    l.outputs = h*w*l.c;
    l.inputs = l.outputs;
    l.truths = 90*(l.w*l.h+1);
    l.delta = calloc(batch*l.outputs, sizeof(float));
    l.output = calloc(batch*l.outputs, sizeof(float));
    l.counts = calloc(90, sizeof(int));
    l.sums = calloc(90, sizeof(float*));
    if(ids){
        int i;
        for(i = 0; i < 90; ++i){
            l.sums[i] = calloc(ids, sizeof(float));
        }
    }
    l.forward = forward_iseg_layer;
    l.backward = backward_iseg_layer;
 #ifdef GPU
    l.forward_gpu = forward_iseg_layer_gpu;
    l.backward_gpu = backward_iseg_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
    l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
 #endif
    fprintf(stderr, "iseg\n");
    srand(0);
    return l;
 }
 void resize_iseg_layer(layer *l, int w, int h)
 {
    l->w = w;
    l->h = h;
    l->outputs = h*w*l->c;
    l->inputs = l->outputs;
    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
    l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
 #ifdef GPU
    cuda_free(l->delta_gpu);
    cuda_free(l->output_gpu);
    l->delta_gpu =     cuda_make_array(l->delta, l->batch*l->outputs);
    l->output_gpu =    cuda_make_array(l->output, l->batch*l->outputs);
 #endif
 }
 void forward_iseg_layer(const layer l, network net)
 {
    double time = what_time_is_it_now();
    int i,b,j,k;
    int ids = l.extra;
    memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
    memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
 #ifndef GPU
    for (b = 0; b < l.batch; ++b){
        int index = b*l.outputs;
        activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC);
    }
 #endif
    for (b = 0; b < l.batch; ++b){
        // a priori, each pixel has no class
        for(i = 0; i < l.classes; ++i){
            for(k = 0; k < l.w*l.h; ++k){
                int index = b*l.outputs + i*l.w*l.h + k;
                l.delta[index] = 0 - l.output[index];
            }
        }
        // a priori, embedding should be small magnitude
        for(i = 0; i < ids; ++i){
            for(k = 0; k < l.w*l.h; ++k){
                int index = b*l.outputs + (i+l.classes)*l.w*l.h + k;
                l.delta[index] = .1 * (0 - l.output[index]);
            }
        }
        memset(l.counts, 0, 90*sizeof(float));
        for(i = 0; i < 90; ++i){
            l.counts[i] = 0;
            fill_cpu(ids, 0, l.sums[i], 1);
            int c = net.truth[b*l.truths + i*(l.w*l.h+1)];
            if(c < 0) break;
            // add up metric embeddings for each instance
            for(k = 0; k < l.w*l.h; ++k){
                int index = b*l.outputs + c*l.w*l.h + k;
                float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k];
                if(v){
                    l.delta[index] = v - l.output[index];
                    axpy_cpu(ids, 1, l.output + b*l.outputs + l.classes*l.w*l.h + k, l.w*l.h, l.sums[i], 1);
                    ++l.counts[i];
                }
            }
        }
        float *mse = calloc(90, sizeof(float));
        for(i = 0; i < 90; ++i){
            int c = net.truth[b*l.truths + i*(l.w*l.h+1)];
            if(c < 0) break;
            for(k = 0; k < l.w*l.h; ++k){
                float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k];
                if(v){
                    int z;
                    float sum = 0;
                    for(z = 0; z < ids; ++z){
                        int index = b*l.outputs + (l.classes + z)*l.w*l.h + k;
                        sum += pow(l.sums[i][z]/l.counts[i] - l.output[index], 2);
                    }
                    mse[i] += sum;
                }
            }
            mse[i] /= l.counts[i];
        }
        // Calculate average embedding
        for(i = 0; i < 90; ++i){
            if(!l.counts[i]) continue;
            scal_cpu(ids, 1.f/l.counts[i], l.sums[i], 1);
            if(b == 0 && net.gpu_index == 0){
                printf("%4d, %6.3f, ", l.counts[i], mse[i]);
                for(j = 0; j < ids/4; ++j){
                    printf("%6.3f,", l.sums[i][j]);
                }
                printf("\n");
            }
        }
        free(mse);
        // Calculate embedding loss
        for(i = 0; i < 90; ++i){
            if(!l.counts[i]) continue;
            for(k = 0; k < l.w*l.h; ++k){
                float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k];
                if(v){
                    for(j = 0; j < 90; ++j){
                        if(!l.counts[j])continue;
                        int z;
                        for(z = 0; z < ids; ++z){
                            int index = b*l.outputs + (l.classes + z)*l.w*l.h + k;
                            float diff = l.sums[j][z] - l.output[index];
                            if (j == i) l.delta[index] +=   diff < 0? -.1 : .1;
                            else        l.delta[index] += -(diff < 0? -.1 : .1);
                        }
                    }
                }
            }
        }
    }
    *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
    printf("took %lf sec\n", what_time_is_it_now() - time);
 }
 void backward_iseg_layer(const layer l, network net)
 {
    axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1);
 }
 #ifdef GPU
 void forward_iseg_layer_gpu(const layer l, network net)
 {
    copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1);
    int b;
    for (b = 0; b < l.batch; ++b){
        activate_array_gpu(l.output_gpu + b*l.outputs, l.classes*l.w*l.h, LOGISTIC);
        //if(l.extra) activate_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC);
    }
    cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs);
    forward_iseg_layer(l, net);
    cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
 }
 void backward_iseg_layer_gpu(const layer l, network net)
 {
    int b;
    for (b = 0; b < l.batch; ++b){
        //if(l.extra) gradient_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC, l.delta_gpu + b*l.outputs + l.classes*l.w*l.h);
    }
    axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
 }
 #endif
--- a/src/iseg_layer.h
+++ b/src/iseg_layer.h
@ -0,0 +1,19 @@
 #ifndef ISEG_LAYER_H
 #define ISEG_LAYER_H
 #include "darknet.h"
 #include "layer.h"
 #include "network.h"
 layer make_iseg_layer(int batch, int w, int h, int classes, int ids);
 void forward_iseg_layer(const layer l, network net);
 void backward_iseg_layer(const layer l, network net);
 void resize_iseg_layer(layer *l, int w, int h);
 int iseg_num_detections(layer l, float thresh);
 #ifdef GPU
 void forward_iseg_layer_gpu(const layer l, network net);
 void backward_iseg_layer_gpu(layer l, network net);
 #endif
 #endif
--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@ -27,8 +27,8 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
    l.w = w;
    l.c = c;
    l.pad = padding;
-    l.out_w = (w + 2*padding - size)/stride + 1;
+    l.out_w = (w + padding - size)/stride + 1;
-    l.out_h = (h + 2*padding - size)/stride + 1;
+    l.out_h = (h + padding - size)/stride + 1;
    l.out_c = c;
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = h*w*c;
@ -57,8 +57,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
    l->w = w;
    l->inputs = h*w*l->c;
-    l->out_w = (w + 2*l->pad - l->size)/l->stride + 1;
+    l->out_w = (w + l->pad - l->size)/l->stride + 1;
-    l->out_h = (h + 2*l->pad - l->size)/l->stride + 1;
+    l->out_h = (h + l->pad - l->size)/l->stride + 1;
    l->outputs = l->out_w * l->out_h * l->c;
    int output_size = l->outputs * l->batch;
@ -79,8 +79,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
 void forward_maxpool_layer(const maxpool_layer l, network net)
 {
    int b,i,j,k,m,n;
-    int w_offset = -l.pad;
+    int w_offset = -l.pad/l.stride;
-    int h_offset = -l.pad;
+    int h_offset = -l.pad/l.stride;
    int h = l.out_h;
    int w = l.out_w;
--- a/src/maxpool_layer_kernels.cu
+++ b/src/maxpool_layer_kernels.cu
@ -9,8 +9,8 @@ extern "C" {
 __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
 {
-    int h = (in_h + 2*pad - size)/stride + 1;
+    int h = (in_h + pad - size)/stride + 1;
-    int w = (in_w + 2*pad - size)/stride + 1;
+    int w = (in_w + pad - size)/stride + 1;
    int c = in_c;
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@ -24,8 +24,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
    id /= c;
    int b = id;
-    int w_offset = -pad;
+    int w_offset = -pad/2;
-    int h_offset = -pad;
+    int h_offset = -pad/2;
    int out_index = j + w*(i + h*(k + c*b));
    float max = -INFINITY;
@ -49,8 +49,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
 __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
 {
-    int h = (in_h + 2*pad - size)/stride + 1;
+    int h = (in_h + pad - size)/stride + 1;
-    int w = (in_w + 2*pad - size)/stride + 1;
+    int w = (in_w + pad - size)/stride + 1;
    int c = in_c;
    int area = (size-1)/stride;
@ -66,8 +66,8 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
    id /= in_c;
    int b = id;
-    int w_offset = -pad;
+    int w_offset = -pad/2;
-    int h_offset = -pad;
+    int h_offset = -pad/2;
    float d = 0;
    int l, m;
--- a/src/parser.c
+++ b/src/parser.c
@ -27,6 +27,7 @@
 #include "parser.h"
 #include "region_layer.h"
 #include "yolo_layer.h"
 #include "iseg_layer.h"
 #include "reorg_layer.h"
 #include "rnn_layer.h"
 #include "route_layer.h"
@ -52,6 +53,7 @@ LAYER_TYPE string_to_layer_type(char * type)
    if (strcmp(type, "[detection]")==0) return DETECTION;
    if (strcmp(type, "[region]")==0) return REGION;
    if (strcmp(type, "[yolo]")==0) return YOLO;
    if (strcmp(type, "[iseg]")==0) return ISEG;
    if (strcmp(type, "[local]")==0) return LOCAL;
    if (strcmp(type, "[conv]")==0
            || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
@ -265,18 +267,19 @@ layer parse_connected(list *options, size_params params)
    return l;
 }
-softmax_layer parse_softmax(list *options, size_params params)
+layer parse_softmax(list *options, size_params params)
 {
    int groups = option_find_int_quiet(options, "groups",1);
-    softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups);
+    layer l = make_softmax_layer(params.batch, params.inputs, groups);
-    layer.temperature = option_find_float_quiet(options, "temperature", 1);
+    l.temperature = option_find_float_quiet(options, "temperature", 1);
    char *tree_file = option_find_str(options, "tree", 0);
-    if (tree_file) layer.softmax_tree = read_tree(tree_file);
+    if (tree_file) l.softmax_tree = read_tree(tree_file);
-    layer.w = params.w;
+    l.w = params.w;
-    layer.h = params.h;
+    l.h = params.h;
-    layer.c = params.c;
+    l.c = params.c;
-    layer.spatial = option_find_float_quiet(options, "spatial", 0);
+    l.spatial = option_find_float_quiet(options, "spatial", 0);
-    return layer;
+    l.noloss =  option_find_int_quiet(options, "noloss", 0);
    return l;
 }
 int *parse_yolo_mask(char *a, int *num)
@ -338,6 +341,15 @@ layer parse_yolo(list *options, size_params params)
    return l;
 }
 layer parse_iseg(list *options, size_params params)
 {
    int classes = option_find_int(options, "classes", 20);
    int ids = option_find_int(options, "ids", 32);
    layer l = make_iseg_layer(params.batch, params.w, params.h, classes, ids);
    assert(l.outputs == params.inputs);
    return l;
 }
 layer parse_region(list *options, size_params params)
 {
    int coords = option_find_int(options, "coords", 4);
@ -472,7 +484,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
 {
    int stride = option_find_int(options, "stride",1);
    int size = option_find_int(options, "size",stride);
-    int padding = option_find_int_quiet(options, "padding", (size-1)/2);
+    int padding = option_find_int_quiet(options, "padding", size-1);
    int batch,h,w,c;
    h = params.h;
@ -791,6 +803,8 @@ network *parse_network_cfg(char *filename)
            l = parse_region(options, params);
        }else if(lt == YOLO){
            l = parse_yolo(options, params);
        }else if(lt == ISEG){
            l = parse_iseg(options, params);
        }else if(lt == DETECTION){
            l = parse_detection(options, params);
        }else if(lt == SOFTMAX){
@ -829,6 +843,7 @@ network *parse_network_cfg(char *filename)
        l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
        l.dontsave = option_find_int_quiet(options, "dontsave", 0);
        l.dontload = option_find_int_quiet(options, "dontload", 0);
        l.numload = option_find_int_quiet(options, "numload", 0);
        l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
        l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
        l.smooth = option_find_float_quiet(options, "smooth", 0);
@ -1152,7 +1167,8 @@ void load_convolutional_weights(layer l, FILE *fp)
        //load_convolutional_weights_binary(l, fp);
        //return;
    }
-    int num = l.nweights;
+    if(l.numload) l.n = l.numload;
    int num = l.c/l.groups*l.n*l.size*l.size;
    fread(l.biases, sizeof(float), l.n, fp);
    if (l.batch_normalize && (!l.dontloadscales)){
        fread(l.scales, sizeof(float), l.n, fp);
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@ -50,7 +50,7 @@ void forward_softmax_layer(const softmax_layer l, network net)
        softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output);
    }
-    if(net.truth){
+    if(net.truth && !l.noloss){
        softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss);
        l.cost[0] = sum_array(l.loss, l.batch*l.inputs);
    }
@ -88,7 +88,7 @@ void forward_softmax_layer_gpu(const softmax_layer l, network net)
            softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
        }
    }
-    if(net.truth){
+    if(net.truth && !l.noloss){
        softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu);
        if(l.softmax_tree){
            mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0);