it's raining really hard outside :-( :rain: :storm: ☁️

2023-08-10 21:13:14 +03:00 · 2017-10-17 11:41:34 -07:00
parent 532c6e1481
commit cd5d393b46
27 changed files with 1340 additions and 1669 deletions
--- a/4
+++ b/4
@ -58,10 +58,10 @@ LDFLAGS+= -lcudnn
 endif

 OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o  lstm_layer.o
-EXECOBJA=captcha.o lsd.o super.o voxel.o art.o tag.o cifar.o go.o rnn.o rnn_vid.o compare.o segmenter.o regressor.o classifier.o coco.o dice.o yolo.o detector.o  writing.o nightmare.o swag.o darknet.o 
+EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o attention.o darknet.o
 ifeq ($(GPU), 1) 
 LDFLAGS+= -lstdc++ 
-OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
+OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o
 endif

 EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA))
--- a/cfg/darknet.cfg
+++ b/cfg/darknet.cfg
@ -1,6 +1,6 @@
 [net]
 # Train
-batch=128
+batch=1
 subdivisions=1
 # Test
 # batch=1
--- a/examples/art.c
+++ b/examples/art.c
@ -5,11 +5,8 @@
 void demo_art(char *cfgfile, char *weightfile, int cam_index)
 {
 #ifdef OPENCV
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);

    srand(2222222);
    CvCapture * cap;
@ -26,7 +23,7 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)

    while(1){
        image in = get_image_from_stream(cap);
-        image in_s = resize_image(in, net.w, net.h);
+        image in_s = resize_image(in, net->w, net->h);
        show_image(in, window);

        float *p = network_predict(net, in_s.data);
--- a/examples/captcha.c
+++ b/examples/captcha.c
@ -30,13 +30,10 @@ void train_captcha(char *cfgfile, char *weightfile)
    float avg_loss = -1;
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, 0);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    int imgs = 1024;
-    int i = *net.seen/imgs;
+    int i = *net->seen/imgs;
    int solved = 1;
    list *plist;
    char **labels = get_labels("/data/captcha/reimgs.labels.list");
@ -53,8 +50,8 @@ void train_captcha(char *cfgfile, char *weightfile)
    data buffer;

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.paths = paths;
    args.classes = 26;
    args.n = imgs;
@ -83,7 +80,7 @@ void train_captcha(char *cfgfile, char *weightfile)
        float loss = train_network(net, train);
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net->seen);
        free_data(train);
        if(i%100==0){
            char buff[256];
@ -95,11 +92,8 @@ void train_captcha(char *cfgfile, char *weightfile)

 void test_captcha(char *cfgfile, char *weightfile, char *filename)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    srand(2222222);
    int i = 0;
    char **names = get_labels("/data/captcha/reimgs.labels.list");
@ -116,7 +110,7 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
            if(!input) return;
            strtok(input, "\n");
        }
-        image im = load_image_color(input, net.w, net.h);
+        image im = load_image_color(input, net->w, net->h);
        float *X = im.data;
        float *predictions = network_predict(net, X);
        top_predictions(net, 26, indexes);
@ -136,21 +130,18 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename)
 void valid_captcha(char *cfgfile, char *weightfile, char *filename)
 {
    char **labels = get_labels("/data/captcha/reimgs.labels.list");
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    list *plist = get_paths("/data/captcha/reimgs.fg.list");
    char **paths = (char **)list_to_array(plist);
    int N = plist->size;
-    int outputs = net.outputs;
+    int outputs = net->outputs;

-    set_batch_network(&net, 1);
+    set_batch_network(net, 1);
    srand(2222222);
    int i, j;
    for(i = 0; i < N; ++i){
        if (i%100 == 0) fprintf(stderr, "%d\n", i);
-        image im = load_image_color(paths[i], net.w, net.h);
+        image im = load_image_color(paths[i], net->w, net->h);
        float *X = im.data;
        float *predictions = network_predict(net, X);
        //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
@ -185,9 +176,9 @@ void valid_captcha(char *cfgfile, char *weightfile, char *filename)
   if(weightfile){
   load_weights(&net, weightfile);
   }
-   printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+   printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
   int imgs = 1024;
-   int i = net.seen/imgs;
+   int i = net->seen/imgs;
   list *plist = get_paths("/data/captcha/train.auto5");
   char **paths = (char **)list_to_array(plist);
   printf("%d\n", plist->size);
@ -201,10 +192,10 @@ void valid_captcha(char *cfgfile, char *weightfile, char *filename)
   printf("Loaded: %lf seconds\n", sec(clock()-time));
   time=clock();
   float loss = train_network(net, train);
-   net.seen += imgs;
+   net->seen += imgs;
   if(avg_loss == -1) avg_loss = loss;
   avg_loss = avg_loss*.9 + loss*.1;
-   printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
+   printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen);
   free_data(train);
   if(i%10==0){
   char buff[256];
@ -251,9 +242,9 @@ network net = parse_network_cfg(cfgfile);
 if(weightfile){
    load_weights(&net, weightfile);
 }
-printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
 int imgs = 1024;
-int i = net.seen/imgs;
+int i = net->seen/imgs;
 list *plist = get_paths("/data/captcha/encode.list");
 char **paths = (char **)list_to_array(plist);
 printf("%d\n", plist->size);
@ -266,10 +257,10 @@ while(1){
    printf("Loaded: %lf seconds\n", sec(clock()-time));
    time=clock();
    float loss = train_network(net, train);
-    net.seen += imgs;
+    net->seen += imgs;
    if(avg_loss == -1) avg_loss = loss;
    avg_loss = avg_loss*.9 + loss*.1;
-    printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
+    printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen);
    free_matrix(train.X);
    if(i%100==0){
        char buff[256];
--- a/examples/cifar.c
+++ b/examples/cifar.c
@ -6,28 +6,25 @@ void train_cifar(char *cfgfile, char *weightfile)
    float avg_loss = -1;
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, 0);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);

    char *backup_directory = "/home/pjreddie/backup/";
    int classes = 10;
    int N = 50000;

    char **labels = get_labels("data/cifar/labels.txt");
-    int epoch = (*net.seen)/N;
+    int epoch = (*net->seen)/N;
    data train = load_all_cifar10();
-    while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
+    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        clock_t time=clock();

        float loss = train_network_sgd(net, train, 1);
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.95 + loss*.05;
-        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
-        if(*net.seen/N > epoch){
-            epoch = *net.seen/N;
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
+        if(*net->seen/N > epoch){
+            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
@ -54,18 +51,15 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
    float avg_loss = -1;
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, 0);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);

    char *backup_directory = "/home/pjreddie/backup/";
    int classes = 10;
    int N = 50000;

    char **labels = get_labels("data/cifar/labels.txt");
-    int epoch = (*net.seen)/N;
+    int epoch = (*net->seen)/N;

    data train = load_all_cifar10();
    matrix soft = csv_to_matrix("results/ensemble.csv");
@ -75,15 +69,15 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
    scale_matrix(train.y, 1. - weight);
    matrix_add_matrix(soft, train.y);

-    while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
+    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        clock_t time=clock();

        float loss = train_network_sgd(net, train, 1);
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.95 + loss*.05;
-        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
-        if(*net.seen/N > epoch){
-            epoch = *net.seen/N;
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
+        if(*net->seen/N > epoch){
+            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
@ -106,11 +100,8 @@ void train_cifar_distill(char *cfgfile, char *weightfile)

 void test_cifar_multi(char *filename, char *weightfile)
 {
-    network net = parse_network_cfg(filename);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
    srand(time(0));

    float avg_acc = 0;
@ -138,10 +129,7 @@ void test_cifar_multi(char *filename, char *weightfile)

 void test_cifar(char *filename, char *weightfile)
 {
-    network net = parse_network_cfg(filename);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(filename, weightfile, 0);
    srand(time(0));

    clock_t time;
@ -182,10 +170,7 @@ char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","hors

 void test_cifar_csv(char *filename, char *weightfile)
 {
-    network net = parse_network_cfg(filename);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(filename, weightfile, 0);
    srand(time(0));

    data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin");
@ -207,12 +192,9 @@ void test_cifar_csv(char *filename, char *weightfile)
    free_data(test);
 }

-void test_cifar_csvtrain(char *filename, char *weightfile)
+void test_cifar_csvtrain(char *cfg, char *weights)
 {
-    network net = parse_network_cfg(filename);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfg, weights, 0);
    srand(time(0));

    data test = load_all_cifar10();
--- a/examples/classifier.c
+++ b/examples/classifier.c
@ -23,7 +23,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
-    network *nets = calloc(ngpus, sizeof(network));
+    network **nets = calloc(ngpus, sizeof(network*));

    srand(time(0));
    int seed = rand();
@ -33,14 +33,14 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
        cuda_set_device(gpus[i]);
 #endif
        nets[i] = load_network(cfgfile, weightfile, clear);
-        nets[i].learning_rate *= ngpus;
+        nets[i]->learning_rate *= ngpus;
    }
    srand(time(0));
-    network net = nets[0];
+    network *net = nets[0];

-    int imgs = net.batch * net.subdivisions * ngpus;
+    int imgs = net->batch * net->subdivisions * ngpus;

-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    list *options = read_data_cfg(datacfg);

    char *backup_directory = option_find_str(options, "backup", "/backup/");
@ -56,19 +56,20 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    double time;

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.threads = 32;
-    args.hierarchy = net.hierarchy;
+    args.hierarchy = net->hierarchy;

-    args.min = net.min_crop;
-    args.max = net.max_crop;
-    args.angle = net.angle;
-    args.aspect = net.aspect;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
-    args.size = net.w;
+    args.min = net->min_ratio*net->w;
+    args.max = net->max_ratio*net->w;
+    printf("%d %d\n", args.min, args.max);
+    args.angle = net->angle;
+    args.aspect = net->aspect;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;
+    args.size = net->w;

    args.paths = paths;
    args.classes = classes;
@ -83,8 +84,32 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    args.d = &buffer;
    load_thread = load_data(args);

-    int epoch = (*net.seen)/N;
-    while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
+    int count = 0;
+    int epoch = (*net->seen)/N;
+    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
+        if(net->random && count++%40 == 0){
+            printf("Resizing\n");
+            int dim = (rand() % 11 + 4) * 32;
+            //if (get_current_batch(net)+200 > net->max_batches) dim = 608;
+            //int dim = (rand() % 4 + 16) * 32;
+            printf("%d\n", dim);
+            args.w = dim;
+            args.h = dim;
+            args.size = dim;
+            args.min = net->min_ratio*dim;
+            args.max = net->max_ratio*dim;
+            printf("%d %d\n", args.min, args.max);
+
+            pthread_join(load_thread, 0);
+            train = buffer;
+            free_data(train);
+            load_thread = load_data(args);
+
+            for(i = 0; i < ngpus; ++i){
+                resize_network(nets[i], dim, dim);
+            }
+            net = nets[0];
+        }
        time = what_time_is_it_now();

        pthread_join(load_thread, 0);
@ -106,10 +131,10 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
 #endif
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
-        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net.seen);
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
        free_data(train);
-        if(*net.seen/N > epoch){
-            epoch = *net.seen/N;
+        if(*net->seen/N > epoch){
+            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
@ -132,124 +157,10 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    free(base);
 }

-
-/*
-   void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
-   {
-   srand(time(0));
-   float avg_loss = -1;
-   char *base = basecfg(cfgfile);
-   printf("%s\n", base);
-   network net = parse_network_cfg(cfgfile);
-   if(weightfile){
-   load_weights(&net, weightfile);
-   }
-   if(clear) *net.seen = 0;
-
-   int imgs = net.batch * net.subdivisions;
-
-   printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-   list *options = read_data_cfg(datacfg);
-
-   char *backup_directory = option_find_str(options, "backup", "/backup/");
-   char *label_list = option_find_str(options, "labels", "data/labels.list");
-   char *train_list = option_find_str(options, "train", "data/train.list");
-   int classes = option_find_int(options, "classes", 2);
-
-   char **labels = get_labels(label_list);
-   list *plist = get_paths(train_list);
-   char **paths = (char **)list_to_array(plist);
-   printf("%d\n", plist->size);
-   int N = plist->size;
-   clock_t time;
-
-   load_args args = {0};
-   args.w = net.w;
-   args.h = net.h;
-   args.threads = 8;
-
-   args.min = net.min_crop;
-   args.max = net.max_crop;
-   args.angle = net.angle;
-   args.aspect = net.aspect;
-   args.exposure = net.exposure;
-   args.saturation = net.saturation;
-   args.hue = net.hue;
-   args.size = net.w;
-   args.hierarchy = net.hierarchy;
-
-   args.paths = paths;
-   args.classes = classes;
-   args.n = imgs;
-   args.m = N;
-   args.labels = labels;
-   args.type = CLASSIFICATION_DATA;
-
-   data train;
-   data buffer;
-   pthread_t load_thread;
-   args.d = &buffer;
-   load_thread = load_data(args);
-
-   int epoch = (*net.seen)/N;
-   while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
-   time=clock();
-
-   pthread_join(load_thread, 0);
-   train = buffer;
-   load_thread = load_data(args);
-
-   printf("Loaded: %lf seconds\n", sec(clock()-time));
-   time=clock();
-
-#ifdef OPENCV
-if(0){
-int u;
-for(u = 0; u < imgs; ++u){
-    image im = float_to_image(net.w, net.h, 3, train.X.vals[u]);
-    show_image(im, "loaded");
-    cvWaitKey(0);
-}
-}
-#endif
-
-float loss = train_network(net, train);
-free_data(train);
-
-if(avg_loss == -1) avg_loss = loss;
-avg_loss = avg_loss*.9 + loss*.1;
-printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
-if(*net.seen/N > epoch){
-    epoch = *net.seen/N;
-    char buff[256];
-    sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
-    save_weights(net, buff);
-}
-if(get_current_batch(net)%100 == 0){
-    char buff[256];
-    sprintf(buff, "%s/%s.backup",backup_directory,base);
-    save_weights(net, buff);
-}
-}
-char buff[256];
-sprintf(buff, "%s/%s.weights", backup_directory, base);
-save_weights(net, buff);
-
-free_network(net);
-free_ptrs((void**)labels, classes);
-free_ptrs((void**)paths, plist->size);
-free_list(plist);
-free(base);
-}
-*/
-
 void validate_classifier_crop(char *datacfg, char *filename, char *weightfile)
 {
    int i = 0;
-    network net = parse_network_cfg(filename);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(filename, weightfile, 0);
    srand(time(0));

    list *options = read_data_cfg(datacfg);
@ -275,8 +186,8 @@ void validate_classifier_crop(char *datacfg, char *filename, char *weightfile)
    data val, buffer;

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;

    args.paths = paths;
    args.classes = classes;
@ -313,11 +224,8 @@ void validate_classifier_crop(char *datacfg, char *filename, char *weightfile)
 void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
 {
    int i, j;
-    network net = parse_network_cfg(filename);
-    set_batch_network(&net, 1);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
    srand(time(0));

    list *options = read_data_cfg(datacfg);
@ -347,8 +255,8 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
                break;
            }
        }
-        int w = net.w;
-        int h = net.h;
+        int w = net->w;
+        int h = net->h;
        int shift = 32;
        image im = load_image_color(paths[i], w+shift, h+shift);
        image images[10];
@ -366,7 +274,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
        float *pred = calloc(classes, sizeof(float));
        for(j = 0; j < 10; ++j){
            float *p = network_predict(net, images[j].data);
-            if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1, 1);
+            if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1);
            axpy_cpu(classes, 1, p, 1, pred, 1);
            free_image(images[j]);
        }
@ -385,11 +293,8 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
 void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
 {
    int i, j;
-    network net = parse_network_cfg(filename);
-    set_batch_network(&net, 1);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
    srand(time(0));

    list *options = read_data_cfg(datacfg);
@ -410,7 +315,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
    float avg_topk = 0;
    int *indexes = calloc(topk, sizeof(int));

-    int size = net.w;
+    int size = net->w;
    for(i = 0; i < m; ++i){
        int class = -1;
        char *path = paths[i];
@ -422,12 +327,12 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
        }
        image im = load_image_color(paths[i], 0, 0);
        image resized = resize_min(im, size);
-        resize_network(&net, resized.w, resized.h);
+        resize_network(net, resized.w, resized.h);
        //show_image(im, "orig");
        //show_image(crop, "cropped");
        //cvWaitKey(0);
        float *pred = network_predict(net, resized.data);
-        if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1, 1);
+        if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1);

        free_image(im);
        free_image(resized);
@ -446,18 +351,15 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
 void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
 {
    int i, j;
-    network net = parse_network_cfg(filename);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
    srand(time(0));

    list *options = read_data_cfg(datacfg);

    char *label_list = option_find_str(options, "labels", "data/labels.list");
    char *leaf_list = option_find_str(options, "leaves", 0);
-    if(leaf_list) change_leaves(net.hierarchy, leaf_list);
+    if(leaf_list) change_leaves(net->hierarchy, leaf_list);
    char *valid_list = option_find_str(options, "valid", "data/train.list");
    int classes = option_find_int(options, "classes", 2);
    int topk = option_find_int(options, "top", 1);
@ -483,13 +385,13 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
            }
        }
        image im = load_image_color(paths[i], 0, 0);
-        image resized = resize_min(im, net.w);
-        image crop = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h);
+        image resized = resize_min(im, net->w);
+        image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h);
        //show_image(im, "orig");
        //show_image(crop, "cropped");
        //cvWaitKey(0);
        float *pred = network_predict(net, crop.data);
-        if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1, 1);
+        if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1);

        if(resized.data != im.data) free_image(resized);
        free_image(im);
@ -505,14 +407,11 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
    }
 }

-void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
+void validate_classifier_multi(char *datacfg, char *cfg, char *weights)
 {
    int i, j;
-    network net = parse_network_cfg(filename);
-    set_batch_network(&net, 1);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);
    srand(time(0));

    list *options = read_data_cfg(datacfg);
@ -524,7 +423,8 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)

    char **labels = get_labels(label_list);
    list *plist = get_paths(valid_list);
-    int scales[] = {224, 288, 320, 352, 384};
+    //int scales[] = {224, 288, 320, 352, 384};
+    int scales[] = {224, 256, 288, 320};
    int nscales = sizeof(scales)/sizeof(scales[0]);

    char **paths = (char **)list_to_array(plist);
@ -548,9 +448,9 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
        image im = load_image_color(paths[i], 0, 0);
        for(j = 0; j < nscales; ++j){
            image r = resize_min(im, scales[j]);
-            resize_network(&net, r.w, r.h);
+            resize_network(net, r.w, r.h);
            float *p = network_predict(net, r.data);
-            if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1 , 1);
+            if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1);
            axpy_cpu(classes, 1, p, 1, pred, 1);
            flip_image(r);
            p = network_predict(net, r.data);
@ -571,11 +471,8 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)

 void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    srand(2222222);

    list *options = read_data_cfg(datacfg);
@ -616,7 +513,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena
        time=clock();
        float *predictions = network_predict(net, X);

-        layer l = net.layers[layer_num];
+        layer l = net->layers[layer_num];
        for(i = 0; i < l.c; ++i){
            if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]);
        }
@ -652,11 +549,8 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena

 void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    srand(2222222);

    list *options = read_data_cfg(datacfg);
@ -682,19 +576,19 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
            strtok(input, "\n");
        }
        image im = load_image_color(input, 0, 0);
-        image r = letterbox_image(im, net.w, net.h);
-        //resize_network(&net, r.w, r.h);
+        image r = letterbox_image(im, net->w, net->h);
+        //resize_network(net, r.w, r.h);
        //printf("%d %d\n", r.w, r.h);

        float *X = r.data;
        time=clock();
        float *predictions = network_predict(net, X);
-        if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1, 1);
-        top_k(predictions, net.outputs, top, indexes);
+        if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
+        top_k(predictions, net->outputs, top, indexes);
        fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        for(i = 0; i < top; ++i){
            int index = indexes[i];
-            //if(net.hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net.hierarchy->parent[index] >= 0) ? names[net.hierarchy->parent[index]] : "Root");
+            //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root");
            //else printf("%s: %f\n",names[index], predictions[index]);
            printf("%5.2f%%: %s\n", predictions[index]*100, names[index]);
        }
@ -708,11 +602,8 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
 void label_classifier(char *datacfg, char *filename, char *weightfile)
 {
    int i;
-    network net = parse_network_cfg(filename);
-    set_batch_network(&net, 1);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
    srand(time(0));

    list *options = read_data_cfg(datacfg);
@ -730,8 +621,8 @@ void label_classifier(char *datacfg, char *filename, char *weightfile)

    for(i = 0; i < m; ++i){
        image im = load_image_color(paths[i], 0, 0);
-        image resized = resize_min(im, net.w);
-        image crop = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h);
+        image resized = resize_min(im, net->w);
+        image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h);
        float *pred = network_predict(net, crop.data);

        if(resized.data != im.data) free_image(resized);
@ -747,10 +638,7 @@ void label_classifier(char *datacfg, char *filename, char *weightfile)
 void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer)
 {
    int curr = 0;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    srand(time(0));

    list *options = read_data_cfg(datacfg);
@ -769,18 +657,18 @@ void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_
    data val, buffer;

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.paths = paths;
    args.classes = classes;
-    args.n = net.batch;
+    args.n = net->batch;
    args.m = 0;
    args.labels = 0;
    args.d = &buffer;
    args.type = OLD_CLASSIFICATION_DATA;

    pthread_t load_thread = load_data_in_thread(args);
-    for(curr = net.batch; curr < m; curr += net.batch){
+    for(curr = net->batch; curr < m; curr += net->batch){
        time=clock();

        pthread_join(load_thread, 0);
@ -788,7 +676,7 @@ void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_

        if(curr < m){
            args.paths = paths + curr;
-            if (curr + net.batch > m) args.n = m - curr;
+            if (curr + net->batch > m) args.n = m - curr;
            load_thread = load_data_in_thread(args);
        }
        fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time));
@ -798,11 +686,11 @@ void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_

        int i, j;
        if (target_layer >= 0){
-            //layer l = net.layers[target_layer];
+            //layer l = net->layers[target_layer];
        }

        for(i = 0; i < pred.rows; ++i){
-            printf("%s", paths[curr-net.batch+i]);
+            printf("%s", paths[curr-net->batch+i]);
            for(j = 0; j < pred.cols; ++j){
                printf("\t%g", pred.vals[i][j]);
            }
@ -824,11 +712,8 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
    float roll = .2;

    printf("Classifier Demo\n");
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    list *options = read_data_cfg(datacfg);

    srand(2222222);
@ -862,7 +747,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i

        image in = get_image_from_stream(cap);
        if(!in.data) break;
-        image in_s = resize_image(in, net.w, net.h);
+        image in_s = resize_image(in, net->w, net->h);

        image out = in;
        int x1 = out.w / 20;
@ -956,11 +841,8 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
    int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697};

    printf("Classifier Demo\n");
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    list *options = read_data_cfg(datacfg);

    srand(2222222);
@ -990,7 +872,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
        gettimeofday(&tval_before, NULL);

        image in = get_image_from_stream(cap);
-        image in_s = resize_image(in, net.w, net.h);
+        image in_s = resize_image(in, net->w, net->h);
        show_image(in, "Threat Detection");

        float *predictions = network_predict(net, in_s.data);
@ -1033,11 +915,8 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
 {
 #ifdef OPENCV
    printf("Classifier Demo\n");
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    list *options = read_data_cfg(datacfg);

    srand(2222222);
@ -1067,11 +946,11 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
        gettimeofday(&tval_before, NULL);

        image in = get_image_from_stream(cap);
-        image in_s = resize_image(in, net.w, net.h);
+        image in_s = resize_image(in, net->w, net->h);
        show_image(in, "Classifier");

        float *predictions = network_predict(net, in_s.data);
-        if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1, 1);
+        if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
        top_predictions(net, top, indexes);

        printf("\033[2J");
--- a/examples/coco.c
+++ b/examples/coco.c
@ -17,17 +17,14 @@ void train_coco(char *cfgfile, char *weightfile)
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    float avg_loss = -1;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    int imgs = net.batch*net.subdivisions;
-    int i = *net.seen/imgs;
+    network *net = load_network(cfgfile, weightfile, 0);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    int imgs = net->batch*net->subdivisions;
+    int i = *net->seen/imgs;
    data train, buffer;


-    layer l = net.layers[net.n - 1];
+    layer l = net->layers[net->n - 1];

    int side = l.side;
    int classes = l.classes;
@ -38,8 +35,8 @@ void train_coco(char *cfgfile, char *weightfile)
    char **paths = (char **)list_to_array(plist);

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
@ -49,15 +46,15 @@ void train_coco(char *cfgfile, char *weightfile)
    args.d = &buffer;
    args.type = REGION_DATA;

-    args.angle = net.angle;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
+    args.angle = net->angle;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    //while(i*imgs < N*120){
-    while(get_current_batch(net) < net.max_batches){
+    while(get_current_batch(net) < net->max_batches){
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
@ -67,7 +64,7 @@ void train_coco(char *cfgfile, char *weightfile)
        printf("Loaded: %lf seconds\n", sec(clock()-time));

        /*
-           image im = float_to_image(net.w, net.h, 3, train.X.vals[113]);
+           image im = float_to_image(net->w, net->h, 3, train.X.vals[113]);
           image copy = copy_image(im);
           draw_coco(copy, train.y.vals[113], 7, "truth");
           cvWaitKey(0);
@ -128,14 +125,11 @@ int get_coco_image_id(char *filename)
    return atoi(p+1);
 }

-void validate_coco(char *cfgfile, char *weightfile)
+void validate_coco(char *cfg, char *weights)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
-    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    srand(time(0));

    char *base = "results/";
@ -144,7 +138,7 @@ void validate_coco(char *cfgfile, char *weightfile)
    //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt");
    char **paths = (char **)list_to_array(plist);

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    int classes = l.classes;
    int side = l.side;

@ -174,8 +168,8 @@ void validate_coco(char *cfgfile, char *weightfile)
    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.type = IMAGE_DATA;

    for(t = 0; t < nthreads; ++t){
@ -221,19 +215,16 @@ void validate_coco(char *cfgfile, char *weightfile)

 void validate_coco_recall(char *cfgfile, char *weightfile)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
-    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    srand(time(0));

    char *base = "results/comp4_det_test_";
    list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt");
    char **paths = (char **)list_to_array(plist);

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    int classes = l.classes;
    int side = l.side;

@ -264,7 +255,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
    for(i = 0; i < m; ++i){
        char *path = paths[i];
        image orig = load_image_color(path, 0, 0);
-        image sized = resize_image(orig, net.w, net.h);
+        image sized = resize_image(orig, net->w, net->h);
        char *id = basecfg(path);
        network_predict(net, sized.data);
        get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1);
@ -309,12 +300,9 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
 void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
 {
    image **alphabet = load_alphabet();
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    layer l = net.layers[net.n-1];
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    layer l = net->layers[net->n-1];
+    set_batch_network(net, 1);
    srand(2222222);
    float nms = .4;
    clock_t time;
@ -335,7 +323,7 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
            strtok(input, "\n");
        }
        image im = load_image_color(input,0,0);
-        image sized = resize_image(im, net.w, net.h);
+        image sized = resize_image(im, net->w, net->h);
        float *X = sized.data;
        time=clock();
        network_predict(net, X);
--- a/examples/darknet.c
+++ b/examples/darknet.c
@ -6,20 +6,15 @@

 extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
 extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen);
-extern void run_voxel(int argc, char **argv);
 extern void run_yolo(int argc, char **argv);
 extern void run_detector(int argc, char **argv);
 extern void run_coco(int argc, char **argv);
-extern void run_writing(int argc, char **argv);
 extern void run_captcha(int argc, char **argv);
 extern void run_nightmare(int argc, char **argv);
-extern void run_dice(int argc, char **argv);
-extern void run_compare(int argc, char **argv);
 extern void run_classifier(int argc, char **argv);
 extern void run_regressor(int argc, char **argv);
 extern void run_segmenter(int argc, char **argv);
 extern void run_char_rnn(int argc, char **argv);
-extern void run_vid_rnn(int argc, char **argv);
 extern void run_tag(int argc, char **argv);
 extern void run_cifar(int argc, char **argv);
 extern void run_go(int argc, char **argv);
@ -32,20 +27,20 @@ void average(int argc, char *argv[])
    char *cfgfile = argv[2];
    char *outfile = argv[3];
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    network sum = parse_network_cfg(cfgfile);
+    network *net = parse_network_cfg(cfgfile);
+    network *sum = parse_network_cfg(cfgfile);

    char *weightfile = argv[4];   
-    load_weights(&sum, weightfile);
+    load_weights(sum, weightfile);

    int i, j;
    int n = argc - 5;
    for(i = 0; i < n; ++i){
        weightfile = argv[i+5];   
-        load_weights(&net, weightfile);
-        for(j = 0; j < net.n; ++j){
-            layer l = net.layers[j];
-            layer out = sum.layers[j];
+        load_weights(net, weightfile);
+        for(j = 0; j < net->n; ++j){
+            layer l = net->layers[j];
+            layer out = sum->layers[j];
            if(l.type == CONVOLUTIONAL){
                int num = l.n*l.c*l.size*l.size;
                axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1);
@ -63,8 +58,8 @@ void average(int argc, char *argv[])
        }
    }
    n = n+1;
-    for(j = 0; j < net.n; ++j){
-        layer l = sum.layers[j];
+    for(j = 0; j < net->n; ++j){
+        layer l = sum->layers[j];
        if(l.type == CONVOLUTIONAL){
            int num = l.n*l.c*l.size*l.size;
            scal_cpu(l.n, 1./n, l.biases, 1);
@ -83,12 +78,12 @@ void average(int argc, char *argv[])
    save_weights(sum, outfile);
 }

-long numops(network net)
+long numops(network *net)
 {
    int i;
    long ops = 0;
-    for(i = 0; i < net.n; ++i){
-        layer l = net.layers[i];
+    for(i = 0; i < net->n; ++i){
+        layer l = net->layers[i];
        if(l.type == CONVOLUTIONAL){
            ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w;
        } else if(l.type == CONNECTED){
@ -121,11 +116,11 @@ long numops(network net)
 void speed(char *cfgfile, int tics)
 {
    if (tics == 0) tics = 1000;
-    network net = parse_network_cfg(cfgfile);
-    set_batch_network(&net, 1);
+    network *net = parse_network_cfg(cfgfile);
+    set_batch_network(net, 1);
    int i;
    double time=what_time_is_it_now();
-    image im = make_image(net.w, net.h, net.c*net.batch);
+    image im = make_image(net->w, net->h, net->c*net->batch);
    for(i = 0; i < tics; ++i){
        network_predict(net, im.data);
    }
@ -141,7 +136,7 @@ void speed(char *cfgfile, int tics)
 void operations(char *cfgfile)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
+    network *net = parse_network_cfg(cfgfile);
    long ops = numops(net);
    printf("Floating Point Operations: %ld\n", ops);
    printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
@ -150,63 +145,56 @@ void operations(char *cfgfile)
 void oneoff(char *cfgfile, char *weightfile, char *outfile)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    int oldn = net.layers[net.n - 2].n;
-    int c = net.layers[net.n - 2].c;
-    scal_cpu(oldn*c, .1, net.layers[net.n - 2].weights, 1);
-    scal_cpu(oldn, 0, net.layers[net.n - 2].biases, 1);
-    net.layers[net.n - 2].n = 11921;
-    net.layers[net.n - 2].biases += 5;
-    net.layers[net.n - 2].weights += 5*c;
+    network *net = parse_network_cfg(cfgfile);
+    int oldn = net->layers[net->n - 2].n;
+    int c = net->layers[net->n - 2].c;
+    scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1);
+    scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1);
+    net->layers[net->n - 2].n = 11921;
+    net->layers[net->n - 2].biases += 5;
+    net->layers[net->n - 2].weights += 5*c;
    if(weightfile){
-        load_weights(&net, weightfile);
+        load_weights(net, weightfile);
    }
-    net.layers[net.n - 2].biases -= 5;
-    net.layers[net.n - 2].weights -= 5*c;
-    net.layers[net.n - 2].n = oldn;
+    net->layers[net->n - 2].biases -= 5;
+    net->layers[net->n - 2].weights -= 5*c;
+    net->layers[net->n - 2].n = oldn;
    printf("%d\n", oldn);
-    layer l = net.layers[net.n - 2];
+    layer l = net->layers[net->n - 2];
    copy_cpu(l.n/3, l.biases, 1, l.biases +   l.n/3, 1);
    copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1);
    copy_cpu(l.n/3*l.c, l.weights, 1, l.weights +   l.n/3*l.c, 1);
    copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1);
-    *net.seen = 0;
+    *net->seen = 0;
    save_weights(net, outfile);
 }

 void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
+    network *net = parse_network_cfg(cfgfile);
    if(weightfile){
-        load_weights_upto(&net, weightfile, 0, net.n);
-        load_weights_upto(&net, weightfile, l, net.n);
+        load_weights_upto(net, weightfile, 0, net->n);
+        load_weights_upto(net, weightfile, l, net->n);
    }
-    *net.seen = 0;
-    save_weights_upto(net, outfile, net.n);
+    *net->seen = 0;
+    save_weights_upto(net, outfile, net->n);
 }

 void partial(char *cfgfile, char *weightfile, char *outfile, int max)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights_upto(&net, weightfile, 0, max);
-    }
-    *net.seen = 0;
+    network *net = load_network(cfgfile, weightfile, 1);
    save_weights_upto(net, outfile, max);
 }

 void rescale_net(char *cfgfile, char *weightfile, char *outfile)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    int i;
-    for(i = 0; i < net.n; ++i){
-        layer l = net.layers[i];
+    for(i = 0; i < net->n; ++i){
+        layer l = net->layers[i];
        if(l.type == CONVOLUTIONAL){
            rescale_weights(l, 2, -.5);
            break;
@ -218,13 +206,10 @@ void rescale_net(char *cfgfile, char *weightfile, char *outfile)
 void rgbgr_net(char *cfgfile, char *weightfile, char *outfile)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    int i;
-    for(i = 0; i < net.n; ++i){
-        layer l = net.layers[i];
+    for(i = 0; i < net->n; ++i){
+        layer l = net->layers[i];
        if(l.type == CONVOLUTIONAL){
            rgbgr_weights(l);
            break;
@ -236,13 +221,10 @@ void rgbgr_net(char *cfgfile, char *weightfile, char *outfile)
 void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    if (weightfile) {
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    int i;
-    for (i = 0; i < net.n; ++i) {
-        layer l = net.layers[i];
+    for (i = 0; i < net->n; ++i) {
+        layer l = net->layers[i];
        if (l.type == CONVOLUTIONAL && l.batch_normalize) {
            denormalize_convolutional_layer(l);
        }
@ -277,18 +259,15 @@ layer normalize_layer(layer l, int n)
 void normalize_net(char *cfgfile, char *weightfile, char *outfile)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    int i;
-    for(i = 0; i < net.n; ++i){
-        layer l = net.layers[i];
+    for(i = 0; i < net->n; ++i){
+        layer l = net->layers[i];
        if(l.type == CONVOLUTIONAL && !l.batch_normalize){
-            net.layers[i] = normalize_layer(l, l.n);
+            net->layers[i] = normalize_layer(l, l.n);
        }
        if (l.type == CONNECTED && !l.batch_normalize) {
-            net.layers[i] = normalize_layer(l, l.outputs);
+            net->layers[i] = normalize_layer(l, l.outputs);
        }
        if (l.type == GRU && l.batch_normalize) {
            *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs);
@ -297,7 +276,7 @@ void normalize_net(char *cfgfile, char *weightfile, char *outfile)
            *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs);
            *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs);
            *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs);
-            net.layers[i].batch_normalize=1;
+            net->layers[i].batch_normalize=1;
        }
    }
    save_weights(net, outfile);
@ -306,13 +285,10 @@ void normalize_net(char *cfgfile, char *weightfile, char *outfile)
 void statistics_net(char *cfgfile, char *weightfile)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    if (weightfile) {
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    int i;
-    for (i = 0; i < net.n; ++i) {
-        layer l = net.layers[i];
+    for (i = 0; i < net->n; ++i) {
+        layer l = net->layers[i];
        if (l.type == CONNECTED && l.batch_normalize) {
            printf("Connected Layer %d\n", i);
            statistics_connected_layer(l);
@ -339,20 +315,17 @@ void statistics_net(char *cfgfile, char *weightfile)
 void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
 {
    gpu_index = -1;
-    network net = parse_network_cfg(cfgfile);
-    if (weightfile) {
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    int i;
-    for (i = 0; i < net.n; ++i) {
-        layer l = net.layers[i];
+    for (i = 0; i < net->n; ++i) {
+        layer l = net->layers[i];
        if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) {
            denormalize_convolutional_layer(l);
-            net.layers[i].batch_normalize=0;
+            net->layers[i].batch_normalize=0;
        }
        if (l.type == CONNECTED && l.batch_normalize) {
            denormalize_connected_layer(l);
-            net.layers[i].batch_normalize=0;
+            net->layers[i].batch_normalize=0;
        }
        if (l.type == GRU && l.batch_normalize) {
            denormalize_connected_layer(*l.input_z_layer);
@ -367,7 +340,7 @@ void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
            l.state_z_layer->batch_normalize = 0;
            l.state_r_layer->batch_normalize = 0;
            l.state_h_layer->batch_normalize = 0;
-            net.layers[i].batch_normalize=0;
+            net->layers[i].batch_normalize=0;
        }
    }
    save_weights(net, outfile);
@ -375,9 +348,9 @@ void denormalize_net(char *cfgfile, char *weightfile, char *outfile)

 void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix)
 {
-    network net = load_network(cfgfile, weightfile, 0);
-    image *ims = get_weights(net.layers[0]);
-    int n = net.layers[0].n;
+    network *net = load_network(cfgfile, weightfile, 0);
+    image *ims = get_weights(net->layers[0]);
+    int n = net->layers[0].n;
    int z;
    for(z = 0; z < num; ++z){
        image im = make_image(h, w, 3);
@ -401,10 +374,7 @@ void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix)

 void visualize(char *cfgfile, char *weightfile)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, 0);
    visualize_network(net);
 #ifdef OPENCV
    cvWaitKey(0);
@ -437,8 +407,6 @@ int main(int argc, char **argv)
        average(argc, argv);
    } else if (0 == strcmp(argv[1], "yolo")){
        run_yolo(argc, argv);
-    } else if (0 == strcmp(argv[1], "voxel")){
-        run_voxel(argc, argv);
    } else if (0 == strcmp(argv[1], "super")){
        run_super(argc, argv);
    } else if (0 == strcmp(argv[1], "lsd")){
@ -457,8 +425,6 @@ int main(int argc, char **argv)
        run_go(argc, argv);
    } else if (0 == strcmp(argv[1], "rnn")){
        run_char_rnn(argc, argv);
-    } else if (0 == strcmp(argv[1], "vid")){
-        run_vid_rnn(argc, argv);
    } else if (0 == strcmp(argv[1], "coco")){
        run_coco(argc, argv);
    } else if (0 == strcmp(argv[1], "classify")){
@ -473,12 +439,6 @@ int main(int argc, char **argv)
        run_art(argc, argv);
    } else if (0 == strcmp(argv[1], "tag")){
        run_tag(argc, argv);
-    } else if (0 == strcmp(argv[1], "compare")){
-        run_compare(argc, argv);
-    } else if (0 == strcmp(argv[1], "dice")){
-        run_dice(argc, argv);
-    } else if (0 == strcmp(argv[1], "writing")){
-        run_writing(argc, argv);
    } else if (0 == strcmp(argv[1], "3d")){
        composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
    } else if (0 == strcmp(argv[1], "test")){
--- a/examples/detector.c
+++ b/examples/detector.c
@ -12,7 +12,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    float avg_loss = -1;
-    network *nets = calloc(ngpus, sizeof(network));
+    network **nets = calloc(ngpus, sizeof(network));

    srand(time(0));
    int seed = rand();
@ -23,16 +23,16 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
        cuda_set_device(gpus[i]);
 #endif
        nets[i] = load_network(cfgfile, weightfile, clear);
-        nets[i].learning_rate *= ngpus;
+        nets[i]->learning_rate *= ngpus;
    }
    srand(time(0));
-    network net = nets[0];
+    network *net = nets[0];

-    int imgs = net.batch * net.subdivisions * ngpus;
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    int imgs = net->batch * net->subdivisions * ngpus;
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    data train, buffer;

-    layer l = net.layers[net.n - 1];
+    layer l = net->layers[net->n - 1];

    int classes = l.classes;
    float jitter = l.jitter;
@ -58,11 +58,11 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
    double time;
    int count = 0;
    //while(i*imgs < N*120){
-    while(get_current_batch(net) < net.max_batches){
+    while(get_current_batch(net) < net->max_batches){
        if(l.random && count++%10 == 0){
            printf("Resizing\n");
            int dim = (rand() % 10 + 10) * 32;
-            if (get_current_batch(net)+200 > net.max_batches) dim = 608;
+            if (get_current_batch(net)+200 > net->max_batches) dim = 608;
            //int dim = (rand() % 4 + 16) * 32;
            printf("%d\n", dim);
            args.w = dim;
@ -74,7 +74,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
            load_thread = load_data(args);

            for(i = 0; i < ngpus; ++i){
-                resize_network(nets + i, dim, dim);
+                resize_network(nets[i], dim, dim);
            }
            net = nets[0];
        }
@ -94,7 +94,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
        /*
        int zz;
        for(zz = 0; zz < train.X.cols; ++zz){
-            image im = float_to_image(net.w, net.h, 3, train.X.vals[zz]);
+            image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]);
            int k;
            for(k = 0; k < l.max_boxes; ++k){
                box b = float_to_box(train.y.vals[zz] + k*5, 1);
@ -239,18 +239,15 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
    int *map = 0;
    if (mapf) map = read_map(mapf);

-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 2);
-    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 2);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    srand(time(0));

    list *plist = get_paths(valid_images);
    char **paths = (char **)list_to_array(plist);

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    int classes = l.classes;

    char buff[1024];
@ -299,11 +296,11 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
    image *buf_resized = calloc(nthreads, sizeof(image));
    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));

-    image input = make_image(net.w, net.h, net.c*2);
+    image input = make_image(net->w, net->h, net->c*2);

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    //args.type = IMAGE_DATA;
    args.type = LETTERBOX_DATA;

@ -330,14 +327,14 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
            char *path = paths[i+t-nthreads];
            char *id = basecfg(path);
-            copy_cpu(net.w*net.h*net.c, val_resized[t].data, 1, input.data, 1);
+            copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1);
            flip_image(val_resized[t]);
-            copy_cpu(net.w*net.h*net.c, val_resized[t].data, 1, input.data + net.w*net.h*net.c, 1);
+            copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1);

            network_predict(net, input.data);
            int w = val[t].w;
            int h = val[t].h;
-            get_region_boxes(l, w, h, net.w, net.h, thresh, probs, boxes, 0, 0, map, .5, 0);
+            get_region_boxes(l, w, h, net->w, net->h, thresh, probs, boxes, 0, 0, map, .5, 0);
            if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
            if (coco){
                print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@ -375,18 +372,15 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
    int *map = 0;
    if (mapf) map = read_map(mapf);

-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
-    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    srand(time(0));

    list *plist = get_paths(valid_images);
    char **paths = (char **)list_to_array(plist);

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    int classes = l.classes;

    char buff[1024];
@ -436,8 +430,8 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    //args.type = IMAGE_DATA;
    args.type = LETTERBOX_DATA;

@ -468,7 +462,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
            network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
-            get_region_boxes(l, w, h, net.w, net.h, thresh, probs, boxes, 0, 0, map, .5, 0);
+            get_region_boxes(l, w, h, net->w, net->h, thresh, probs, boxes, 0, 0, map, .5, 0);
            if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
            if (coco){
                print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@ -495,18 +489,15 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out

 void validate_detector_recall(char *cfgfile, char *weightfile)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
-    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    srand(time(0));

    list *plist = get_paths("data/coco_val_5k.list");
    char **paths = (char **)list_to_array(plist);

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    int classes = l.classes;

    int j, k;
@ -529,10 +520,10 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
    for(i = 0; i < m; ++i){
        char *path = paths[i];
        image orig = load_image_color(path, 0, 0);
-        image sized = resize_image(orig, net.w, net.h);
+        image sized = resize_image(orig, net->w, net->h);
        char *id = basecfg(path);
        network_predict(net, sized.data);
-        get_region_boxes(l, sized.w, sized.h, net.w, net.h, thresh, probs, boxes, 0, 1, 0, .5, 1);
+        get_region_boxes(l, sized.w, sized.h, net->w, net->h, thresh, probs, boxes, 0, 1, 0, .5, 1);
        if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);

        char labelpath[4096];
@ -578,11 +569,8 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
    char **names = get_labels(name_list);

    image **alphabet = load_alphabet();
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    srand(2222222);
    double time;
    char buff[256];
@ -600,12 +588,12 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
            strtok(input, "\n");
        }
        image im = load_image_color(input,0,0);
-        image sized = letterbox_image(im, net.w, net.h);
-        //image sized = resize_image(im, net.w, net.h);
-        //image sized2 = resize_max(im, net.w);
-        //image sized = crop_image(sized2, -((net.w - sized2.w)/2), -((net.h - sized2.h)/2), net.w, net.h);
-        //resize_network(&net, sized.w, sized.h);
-        layer l = net.layers[net.n-1];
+        image sized = letterbox_image(im, net->w, net->h);
+        //image sized = resize_image(im, net->w, net->h);
+        //image sized2 = resize_max(im, net->w);
+        //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
+        //resize_network(net, sized.w, sized.h);
+        layer l = net->layers[net->n-1];

        box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
        float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
@ -620,7 +608,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
        time=what_time_is_it_now();
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
-        get_region_boxes(l, im.w, im.h, net.w, net.h, thresh, probs, boxes, masks, 0, 0, hier_thresh, 1);
+        get_region_boxes(l, im.w, im.h, net->w, net->h, thresh, probs, boxes, masks, 0, 0, hier_thresh, 1);
        if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        //else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, masks, names, alphabet, l.classes);
--- a/examples/go.c
+++ b/examples/go.c
@ -124,7 +124,7 @@ void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ng
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
-    network *nets = calloc(ngpus, sizeof(network));
+    network **nets = calloc(ngpus, sizeof(network*));

    srand(time(0));
    int seed = rand();
@ -134,10 +134,10 @@ void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ng
        cuda_set_device(gpus[i]);
 #endif
        nets[i] = load_network(cfgfile, weightfile, clear);
-        nets[i].learning_rate *= ngpus;
+        nets[i]->learning_rate *= ngpus;
    }
-    network net = nets[0];
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = nets[0];
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);

    char *backup_directory = "/home/pjreddie/backup/";

@ -147,11 +147,11 @@ void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ng

    int N = m.n;
    printf("Moves: %d\n", N);
-    int epoch = (*net.seen)/N;
-    while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
+    int epoch = (*net->seen)/N;
+    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        clock_t time=clock();

-        data train = random_go_moves(m, net.batch*net.subdivisions*ngpus);
+        data train = random_go_moves(m, net->batch*net->subdivisions*ngpus);
        printf("Loaded: %lf seconds\n", sec(clock()-time));
        time=clock();

@ -169,9 +169,9 @@ void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ng

        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.95 + loss*.05;
-        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
-        if(*net.seen/N > epoch){
-            epoch = *net.seen/N;
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
+        if(*net->seen/N > epoch){
+            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch);
            save_weights(net, buff);
@ -281,7 +281,7 @@ void flip_board(float *board)
    }
 }

-void predict_move(network net, float *board, float *move, int multi)
+void predict_move(network *net, float *board, float *move, int multi)
 {
    float *output = network_predict(net, board);
    copy_cpu(19*19+1, output, 1, move, 1);
@ -370,7 +370,7 @@ int legal_go(float *b, char *ko, int p, int r, int c)
    return 1;
 }

-int generate_move(network net, int player, float *board, int multi, float thresh, float temp, char *ko, int print)
+int generate_move(network *net, int player, float *board, int multi, float thresh, float temp, char *ko, int print)
 {
    int i, j;
    int empty = 1;
@ -383,7 +383,7 @@ int generate_move(network net, int player, float *board, int multi, float thresh
    if(empty) {
        return 72;
    }
-    for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
+    for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp;

    float move[362];
    if (player < 0) flip_board(board);
@ -439,12 +439,9 @@ void valid_go(char *cfgfile, char *weightfile, int multi, char *filename)
    srand(time(0));
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);

    float *board = calloc(19*19, sizeof(float));
    float *move = calloc(19*19+1, sizeof(float));
@ -486,12 +483,9 @@ int print_game(float *board, FILE *fp)

 void engine_go(char *filename, char *weightfile, int multi)
 {
-    network net = parse_network_cfg(filename);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
    srand(time(0));
-    set_batch_network(&net, 1);
    float *board = calloc(19*19, sizeof(float));
    char *one = calloc(91, sizeof(char));
    char *two = calloc(91, sizeof(char));
@ -679,12 +673,9 @@ void engine_go(char *filename, char *weightfile, int multi)

 void test_go(char *cfg, char *weights, int multi)
 {
-    network net = parse_network_cfg(cfg);
-    if(weights){
-        load_weights(&net, weights);
-    }
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);
    srand(time(0));
-    set_batch_network(&net, 1);
    float *board = calloc(19*19, sizeof(float));
    float *move = calloc(19*19+1, sizeof(float));
    int color = 1;
@ -785,23 +776,24 @@ float score_game(float *board)

 void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
 {
-    network net = parse_network_cfg(filename);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);

-    network net2 = net;
-    if(f2){
+    network *net2;
+    if (f2) {
        net2 = parse_network_cfg(f2);
        if(w2){
-            load_weights(&net2, w2);
+            load_weights(net2, w2);
        }
+    } else {
+        net2 = calloc(1, sizeof(network));
+        *net2 = *net;
    }
    srand(time(0));
    char boards[600][93];
    int count = 0;
-    set_batch_network(&net, 1);
-    set_batch_network(&net2, 1);
+    set_batch_network(net, 1);
+    set_batch_network(net2, 1);
    float *board = calloc(19*19, sizeof(float));
    char *one = calloc(91, sizeof(char));
    char *two = calloc(91, sizeof(char));
@ -837,7 +829,7 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
        }
        print_board(stderr, board, 1, 0);
        //sleep(1);
-        network use = ((total%2==0) == (player==1)) ? net : net2;
+        network *use = ((total%2==0) == (player==1)) ? net : net2;
        int index = generate_move(use, player, board, multi, .4, 1, two, 0);
        if(index < 0){
            done = 1;
--- a/examples/lsd.c
+++ b/examples/lsd.c
@ -16,9 +16,9 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg
    char *gbase = basecfg(gcfg);
    char *abase = basecfg(acfg);

-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet.learning_rate, gnet.momentum, gnet.decay);
-    int imgs = gnet.batch*gnet.subdivisions;
-    int i = *gnet.seen/imgs;
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay);
+    int imgs = gnet->batch*gnet->subdivisions;
+    int i = *gnet->seen/imgs;
    data train, tbuffer;
    data style, sbuffer;

@ -55,27 +55,27 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg
    float aloss_avg = -1;
    float floss_avg = -1;

-    fnet.train=1;
-    int x_size = fnet.inputs*fnet.batch;
-    int y_size = fnet.truths*fnet.batch;
+    fnet->train=1;
+    int x_size = fnet->inputs*fnet->batch;
+    int y_size = fnet->truths*fnet->batch;
    float *X = calloc(x_size, sizeof(float));
    float *y = calloc(y_size, sizeof(float));


-    int ax_size = anet.inputs*anet.batch;
-    int ay_size = anet.truths*anet.batch;
-    fill_gpu(ay_size, .9, anet.truth_gpu, 1);
-    anet.delta_gpu = cuda_make_array(0, ax_size);
-    anet.train = 1;
+    int ax_size = anet->inputs*anet->batch;
+    int ay_size = anet->truths*anet->batch;
+    fill_gpu(ay_size, .9, anet->truth_gpu, 1);
+    anet->delta_gpu = cuda_make_array(0, ax_size);
+    anet->train = 1;

-    int gx_size = gnet.inputs*gnet.batch;
-    int gy_size = gnet.truths*gnet.batch;
+    int gx_size = gnet->inputs*gnet->batch;
+    int gy_size = gnet->truths*gnet->batch;
    gstate.input = cuda_make_array(0, gx_size);
    gstate.truth = 0;
    gstate.delta = 0;
    gstate.train = 1;

-    while (get_current_batch(gnet) < gnet.max_batches) {
+    while (get_current_batch(gnet) < gnet->max_batches) {
        i += 1;
        time=clock();
        pthread_join(tload_thread, 0);
@ -92,20 +92,20 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg

        int j, k;
        float floss = 0;
-        for(j = 0; j < fnet.subdivisions; ++j){
-            layer imlayer = gnet.layers[gnet.n - 1];
-            get_next_batch(train, fnet.batch, j*fnet.batch, X, y);
+        for(j = 0; j < fnet->subdivisions; ++j){
+            layer imlayer = gnet->layers[gnet->n - 1];
+            get_next_batch(train, fnet->batch, j*fnet->batch, X, y);

            cuda_push_array(fstate.input, X, x_size);
            cuda_push_array(gstate.input, X, gx_size);
-            *gnet.seen += gnet.batch;
+            *gnet->seen += gnet->batch;

            forward_network_gpu(fnet, fstate);
-            float *feats = fnet.layers[fnet.n - 2].output_gpu;
+            float *feats = fnet->layers[fnet->n - 2].output_gpu;
            copy_gpu(y_size, feats, 1, fstate.truth, 1);

            forward_network_gpu(gnet, gstate);
-            float *gen = gnet.layers[gnet.n-1].output_gpu;
+            float *gen = gnet->layers[gnet->n-1].output_gpu;
            copy_gpu(x_size, gen, 1, fstate.input, 1);

            fill_gpu(x_size, 0, fstate.delta, 1);
@ -135,11 +135,11 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg

            backward_network_gpu(gnet, gstate);

-            floss += get_network_cost(fnet) /(fnet.subdivisions*fnet.batch);
+            floss += get_network_cost(fnet) /(fnet->subdivisions*fnet->batch);

            cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch);
-            for(k = 0; k < gnet.batch; ++k){
-                int index = j*gnet.batch + k;
+            for(k = 0; k < gnet->batch; ++k){
+                int index = j*gnet->batch + k;
                copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1);
                generated.y.vals[index][0] = .1;
                style.y.vals[index][0] = .9;
@ -148,7 +148,7 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg

 */
 /*
-        image sim = float_to_image(anet.w, anet.h, anet.c, style.X.vals[j]);
+        image sim = float_to_image(anet->w, anet->h, anet->c, style.X.vals[j]);
        show_image(sim, "style");
        cvWaitKey(0);
        */
@ -208,16 +208,16 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear

    int i, j, k;
    layer imlayer = {0};
-    for (i = 0; i < net.n; ++i) {
-        if (net.layers[i].out_c == 3) {
-            imlayer = net.layers[i];
+    for (i = 0; i < net->n; ++i) {
+        if (net->layers[i].out_c == 3) {
+            imlayer = net->layers[i];
            break;
        }
    }

-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    int imgs = net.batch*net.subdivisions;
-    i = *net.seen/imgs;
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    int imgs = net->batch*net->subdivisions;
+    i = *net->seen/imgs;
    data train, buffer;


@ -226,21 +226,21 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear
    char **paths = (char **)list_to_array(plist);

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
    args.d = &buffer;

-    args.min = net.min_crop;
-    args.max = net.max_crop;
-    args.angle = net.angle;
-    args.aspect = net.aspect;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
-    args.size = net.w;
+    args.min = net->min_crop;
+    args.max = net->max_crop;
+    args.angle = net->angle;
+    args.aspect = net->aspect;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;
+    args.size = net->w;
    args.type = CLASSIFICATION_DATA;
    args.classes = 1;
    char *ls[1] = {"coco"};
@ -252,7 +252,7 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear
    network_state gstate = {0};
    gstate.index = 0;
    gstate.net = net;
-    int x_size = get_network_input_size(net)*net.batch;
+    int x_size = get_network_input_size(net)*net->batch;
    int y_size = x_size;
    gstate.input = cuda_make_array(0, x_size);
    gstate.truth = cuda_make_array(0, y_size);
@ -265,7 +265,7 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear
    network_state astate = {0};
    astate.index = 0;
    astate.net = anet;
-    int ay_size = get_network_output_size(anet)*anet.batch;
+    int ay_size = get_network_output_size(anet)*anet->batch;
    astate.input = 0;
    astate.truth = 0;
    astate.delta = 0;
@ -280,7 +280,7 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear

    //data generated = copy_data(train);

-    while (get_current_batch(net) < net.max_batches) {
+    while (get_current_batch(net) < net->max_batches) {
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
@ -291,31 +291,31 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear

        data gray = copy_data(train);
        for(j = 0; j < imgs; ++j){
-            image gim = float_to_image(net.w, net.h, net.c, gray.X.vals[j]);
+            image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]);
            grayscale_image_3c(gim);
            train.y.vals[j][0] = .9;

-            image yim = float_to_image(net.w, net.h, net.c, train.X.vals[j]);
+            image yim = float_to_image(net->w, net->h, net->c, train.X.vals[j]);
            //rgb_to_yuv(yim);
        }
        time=clock();
        float gloss = 0;

-        for(j = 0; j < net.subdivisions; ++j){
-            get_next_batch(train, net.batch, j*net.batch, pixs, y);
-            get_next_batch(gray, net.batch, j*net.batch, graypixs, y);
+        for(j = 0; j < net->subdivisions; ++j){
+            get_next_batch(train, net->batch, j*net->batch, pixs, y);
+            get_next_batch(gray, net->batch, j*net->batch, graypixs, y);
            cuda_push_array(gstate.input, graypixs, x_size);
            cuda_push_array(gstate.truth, pixs, y_size);
            */
            /*
-            image origi = float_to_image(net.w, net.h, 3, pixs);
-            image grayi = float_to_image(net.w, net.h, 3, graypixs);
+            image origi = float_to_image(net->w, net->h, 3, pixs);
+            image grayi = float_to_image(net->w, net->h, 3, graypixs);
            show_image(grayi, "gray");
            show_image(origi, "orig");
            cvWaitKey(0);
            */
            /*
-            *net.seen += net.batch;
+            *net->seen += net->batch;
            forward_network_gpu(net, gstate);

            fill_gpu(imlayer.outputs, 0, imerror, 1);
@ -325,22 +325,22 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear
            forward_network_gpu(anet, astate);
            backward_network_gpu(anet, astate);

-            scal_gpu(imlayer.outputs, .1, net.layers[net.n-1].delta_gpu, 1);
+            scal_gpu(imlayer.outputs, .1, net->layers[net->n-1].delta_gpu, 1);

            backward_network_gpu(net, gstate);

            scal_gpu(imlayer.outputs, 1000, imerror, 1);

            printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs));
-            printf("features %f\n", cuda_mag_array(net.layers[net.n-1].delta_gpu, imlayer.outputs));
+            printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs));

            axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1);

-            gloss += get_network_cost(net) /(net.subdivisions*net.batch);
+            gloss += get_network_cost(net) /(net->subdivisions*net->batch);

            cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch);
-            for(k = 0; k < net.batch; ++k){
-                int index = j*net.batch + k;
+            for(k = 0; k < net->batch; ++k){
+                int index = j*net->batch + k;
                copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1);
                gray.y.vals[index][0] = .1;
            }
@ -385,11 +385,8 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear

 void test_dcgan(char *cfgfile, char *weightfile)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    srand(2222222);

    clock_t time;
@ -397,8 +394,8 @@ void test_dcgan(char *cfgfile, char *weightfile)
    char *input = buff;
    int i, imlayer = 0;

-    for (i = 0; i < net.n; ++i) {
-        if (net.layers[i].out_c == 3) {
+    for (i = 0; i < net->n; ++i) {
+        if (net->layers[i].out_c == 3) {
            imlayer = i;
            printf("%d\n", i);
            break;
@ -406,7 +403,7 @@ void test_dcgan(char *cfgfile, char *weightfile)
    }

    while(1){
-        image im = make_image(net.w, net.h, net.c);
+        image im = make_image(net->w, net->h, net->c);
        int i;
        for(i = 0; i < im.w*im.h*im.c; ++i){
            im.data[i] = rand_normal();
@ -449,23 +446,23 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
    char *base = basecfg(cfg);
    char *abase = basecfg(acfg);
    printf("%s\n", base);
-    network gnet = load_network(cfg, weight, clear);
-    network anet = load_network(acfg, aweight, clear);
-    //float orig_rate = anet.learning_rate;
+    network *gnet = load_network(cfg, weight, clear);
+    network *anet = load_network(acfg, aweight, clear);
+    //float orig_rate = anet->learning_rate;

    int start = 0;
    int i, j, k;
    layer imlayer = {0};
-    for (i = 0; i < gnet.n; ++i) {
-        if (gnet.layers[i].out_c == 3) {
-            imlayer = gnet.layers[i];
+    for (i = 0; i < gnet->n; ++i) {
+        if (gnet->layers[i].out_c == 3) {
+            imlayer = gnet->layers[i];
            break;
        }
    }

-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet.learning_rate, gnet.momentum, gnet.decay);
-    int imgs = gnet.batch*gnet.subdivisions;
-    i = *gnet.seen/imgs;
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay);
+    int imgs = gnet->batch*gnet->subdivisions;
+    i = *gnet->seen/imgs;
    data train, buffer;


@ -487,20 +484,20 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;

-    gnet.train = 1;
-    anet.train = 1;
+    gnet->train = 1;
+    anet->train = 1;

-    int x_size = gnet.inputs*gnet.batch;
-    int y_size = gnet.truths*gnet.batch;
+    int x_size = gnet->inputs*gnet->batch;
+    int y_size = gnet->truths*gnet->batch;
    float *imerror = cuda_make_array(0, y_size);

-    //int ay_size = anet.truths*anet.batch;
+    //int ay_size = anet->truths*anet->batch;

    float aloss_avg = -1;

    //data generated = copy_data(train);

-    while (get_current_batch(gnet) < gnet.max_batches) {
+    while (get_current_batch(gnet) < gnet->max_batches) {
    start += 1;
        i += 1;
        time=clock();
@ -521,41 +518,41 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
        }
        time=clock();

-        for(j = 0; j < gnet.subdivisions; ++j){
-            get_next_batch(train, gnet.batch, j*gnet.batch, gnet.truth, 0);
+        for(j = 0; j < gnet->subdivisions; ++j){
+            get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0);
            int z;
            for(z = 0; z < x_size; ++z){
-                gnet.input[z] = rand_normal();
+                gnet->input[z] = rand_normal();
            }

-            cuda_push_array(gnet.input_gpu, gnet.input, x_size);
-            cuda_push_array(gnet.truth_gpu, gnet.truth, y_size);
-            *gnet.seen += gnet.batch;
+            cuda_push_array(gnet->input_gpu, gnet->input, x_size);
+            cuda_push_array(gnet->truth_gpu, gnet->truth, y_size);
+            *gnet->seen += gnet->batch;
            forward_network_gpu(gnet);

            fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1);
-            fill_gpu(anet.truths*anet.batch, .95, anet.truth_gpu, 1);
-            copy_gpu(anet.inputs*anet.batch, imlayer.output_gpu, 1, anet.input_gpu, 1);
-            anet.delta_gpu = imerror;
+            fill_gpu(anet->truths*anet->batch, .95, anet->truth_gpu, 1);
+            copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1);
+            anet->delta_gpu = imerror;
            forward_network_gpu(anet);
            backward_network_gpu(anet);

-            float genaloss = *anet.cost / anet.batch;
+            float genaloss = *anet->cost / anet->batch;
            printf("%f\n", genaloss);

            scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);
-            scal_gpu(imlayer.outputs*imlayer.batch, .00, gnet.layers[gnet.n-1].delta_gpu, 1);
+            scal_gpu(imlayer.outputs*imlayer.batch, .00, gnet->layers[gnet->n-1].delta_gpu, 1);

            printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch));
-            printf("features %f\n", cuda_mag_array(gnet.layers[gnet.n-1].delta_gpu, imlayer.outputs*imlayer.batch));
+            printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch));

-            axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet.layers[gnet.n-1].delta_gpu, 1);
+            axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1);

            backward_network_gpu(gnet);

-            for(k = 0; k < gnet.batch; ++k){
-                int index = j*gnet.batch + k;
-                copy_cpu(gnet.outputs, gnet.output + k*gnet.outputs, 1, gen.X.vals[index], 1);
+            for(k = 0; k < gnet->batch; ++k){
+                int index = j*gnet->batch + k;
+                copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1);
            }
        }
        harmless_update_network_gpu(anet);
@ -570,8 +567,8 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
        //scale_image(im2, .5);
            #ifdef OPENCV
        if(display){
-            image im = float_to_image(anet.w, anet.h, anet.c, gen.X.vals[0]);
-            image im2 = float_to_image(anet.w, anet.h, anet.c, train.X.vals[0]);
+            image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]);
+            image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
            show_image(im, "gen");
            show_image(im2, "train");
            cvWaitKey(50);
@ -580,9 +577,9 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,

 /*
        if(aloss < .1){
-            anet.learning_rate = 0;
+            anet->learning_rate = 0;
        } else if (aloss > .3){
-            anet.learning_rate = orig_rate;
+            anet->learning_rate = orig_rate;
        }
        */

@ -627,21 +624,21 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
    char *base = basecfg(cfg);
    char *abase = basecfg(acfg);
    printf("%s\n", base);
-    network net = load_network(cfg, weight, clear);
-    network anet = load_network(acfg, aweight, clear);
+    network *net = load_network(cfg, weight, clear);
+    network *anet = load_network(acfg, aweight, clear);

    int i, j, k;
    layer imlayer = {0};
-    for (i = 0; i < net.n; ++i) {
-        if (net.layers[i].out_c == 3) {
-            imlayer = net.layers[i];
+    for (i = 0; i < net->n; ++i) {
+        if (net->layers[i].out_c == 3) {
+            imlayer = net->layers[i];
            break;
        }
    }

-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    int imgs = net.batch*net.subdivisions;
-    i = *net.seen/imgs;
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    int imgs = net->batch*net->subdivisions;
+    i = *net->seen/imgs;
    data train, buffer;


@ -663,17 +660,17 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;

-    int x_size = net.inputs*net.batch;
+    int x_size = net->inputs*net->batch;
    //int y_size = x_size;
-    net.delta = 0;
-    net.train = 1;
+    net->delta = 0;
+    net->train = 1;
    float *pixs = calloc(x_size, sizeof(float));
    float *graypixs = calloc(x_size, sizeof(float));
    //float *y = calloc(y_size, sizeof(float));

-    //int ay_size = anet.outputs*anet.batch;
-    anet.delta = 0;
-    anet.train = 1;
+    //int ay_size = anet->outputs*anet->batch;
+    anet->delta = 0;
+    anet->train = 1;

    float *imerror = cuda_make_array(0, imlayer.outputs*imlayer.batch);

@ -682,7 +679,7 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle

    //data generated = copy_data(train);

-    while (get_current_batch(net) < net.max_batches) {
+    while (get_current_batch(net) < net->max_batches) {
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
@ -693,7 +690,7 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle

        data gray = copy_data(train);
        for(j = 0; j < imgs; ++j){
-            image gim = float_to_image(net.w, net.h, net.c, gray.X.vals[j]);
+            image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]);
            grayscale_image_3c(gim);
            train.y.vals[j][0] = .95;
            gray.y.vals[j][0] = .05;
@ -701,44 +698,44 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
        time=clock();
        float gloss = 0;

-        for(j = 0; j < net.subdivisions; ++j){
-            get_next_batch(train, net.batch, j*net.batch, pixs, 0);
-            get_next_batch(gray, net.batch, j*net.batch, graypixs, 0);
-            cuda_push_array(net.input_gpu, graypixs, net.inputs*net.batch);
-            cuda_push_array(net.truth_gpu, pixs, net.truths*net.batch);
+        for(j = 0; j < net->subdivisions; ++j){
+            get_next_batch(train, net->batch, j*net->batch, pixs, 0);
+            get_next_batch(gray, net->batch, j*net->batch, graypixs, 0);
+            cuda_push_array(net->input_gpu, graypixs, net->inputs*net->batch);
+            cuda_push_array(net->truth_gpu, pixs, net->truths*net->batch);
            /*
-               image origi = float_to_image(net.w, net.h, 3, pixs);
-               image grayi = float_to_image(net.w, net.h, 3, graypixs);
+               image origi = float_to_image(net->w, net->h, 3, pixs);
+               image grayi = float_to_image(net->w, net->h, 3, graypixs);
               show_image(grayi, "gray");
               show_image(origi, "orig");
               cvWaitKey(0);
             */
-            *net.seen += net.batch;
+            *net->seen += net->batch;
            forward_network_gpu(net);

            fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1);
-            copy_gpu(anet.inputs*anet.batch, imlayer.output_gpu, 1, anet.input_gpu, 1);
-            fill_gpu(anet.inputs*anet.batch, .95, anet.truth_gpu, 1);
-            anet.delta_gpu = imerror;
+            copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1);
+            fill_gpu(anet->inputs*anet->batch, .95, anet->truth_gpu, 1);
+            anet->delta_gpu = imerror;
            forward_network_gpu(anet);
            backward_network_gpu(anet);

-            scal_gpu(imlayer.outputs*imlayer.batch, 1./100., net.layers[net.n-1].delta_gpu, 1);
+            scal_gpu(imlayer.outputs*imlayer.batch, 1./100., net->layers[net->n-1].delta_gpu, 1);

            scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);

            printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch));
-            printf("features %f\n", cuda_mag_array(net.layers[net.n-1].delta_gpu, imlayer.outputs*imlayer.batch));
+            printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs*imlayer.batch));

-            axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net.layers[net.n-1].delta_gpu, 1);
+            axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net->layers[net->n-1].delta_gpu, 1);

            backward_network_gpu(net);


-            gloss += *net.cost /(net.subdivisions*net.batch);
+            gloss += *net->cost /(net->subdivisions*net->batch);

-            for(k = 0; k < net.batch; ++k){
-                int index = j*net.batch + k;
+            for(k = 0; k < net->batch; ++k){
+                int index = j*net->batch + k;
                copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1);
            }
        }
@ -752,8 +749,8 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle

            #ifdef OPENCV
        if(display){
-            image im = float_to_image(anet.w, anet.h, anet.c, gray.X.vals[0]);
-            image im2 = float_to_image(anet.w, anet.h, anet.c, train.X.vals[0]);
+            image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]);
+            image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
            show_image(im, "gen");
            show_image(im2, "train");
            cvWaitKey(50);
@ -801,27 +798,27 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi
    if(weightfile){
        load_weights(&net, weightfile);
    }
-    if(clear) *net.seen = 0;
+    if(clear) *net->seen = 0;

    char *abase = basecfg(acfgfile);
    network anet = parse_network_cfg(acfgfile);
    if(aweightfile){
        load_weights(&anet, aweightfile);
    }
-    if(clear) *anet.seen = 0;
+    if(clear) *anet->seen = 0;

    int i, j, k;
    layer imlayer = {0};
-    for (i = 0; i < net.n; ++i) {
-        if (net.layers[i].out_c == 3) {
-            imlayer = net.layers[i];
+    for (i = 0; i < net->n; ++i) {
+        if (net->layers[i].out_c == 3) {
+            imlayer = net->layers[i];
            break;
        }
    }

-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    int imgs = net.batch*net.subdivisions;
-    i = *net.seen/imgs;
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    int imgs = net->batch*net->subdivisions;
+    i = *net->seen/imgs;
    data train, buffer;


@ -830,21 +827,21 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi
    char **paths = (char **)list_to_array(plist);

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
    args.d = &buffer;

-    args.min = net.min_crop;
-    args.max = net.max_crop;
-    args.angle = net.angle;
-    args.aspect = net.aspect;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
-    args.size = net.w;
+    args.min = net->min_crop;
+    args.max = net->max_crop;
+    args.angle = net->angle;
+    args.aspect = net->aspect;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;
+    args.size = net->w;
    args.type = CLASSIFICATION_DATA;
    args.classes = 1;
    char *ls[1] = {"coco"};
@ -856,8 +853,8 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi
    network_state gstate = {0};
    gstate.index = 0;
    gstate.net = net;
-    int x_size = get_network_input_size(net)*net.batch;
-    int y_size = 1*net.batch;
+    int x_size = get_network_input_size(net)*net->batch;
+    int y_size = 1*net->batch;
    gstate.input = cuda_make_array(0, x_size);
    gstate.truth = 0;
    gstate.delta = 0;
@ -868,7 +865,7 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi
    network_state astate = {0};
    astate.index = 0;
    astate.net = anet;
-    int ay_size = get_network_output_size(anet)*anet.batch;
+    int ay_size = get_network_output_size(anet)*anet->batch;
    astate.input = 0;
    astate.truth = 0;
    astate.delta = 0;
@ -883,7 +880,7 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi

    //data generated = copy_data(train);

-    while (get_current_batch(net) < net.max_batches) {
+    while (get_current_batch(net) < net->max_batches) {
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
@ -896,10 +893,10 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi
        time=clock();
        float gloss = 0;

-        for(j = 0; j < net.subdivisions; ++j){
-            get_next_batch(train, net.batch, j*net.batch, X, y);
+        for(j = 0; j < net->subdivisions; ++j){
+            get_next_batch(train, net->batch, j*net->batch, X, y);
            cuda_push_array(gstate.input, X, x_size);
-            *net.seen += net.batch;
+            *net->seen += net->batch;
            forward_network_gpu(net, gstate);

            fill_gpu(imlayer.outputs, 0, imerror, 1);
@ -917,11 +914,11 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi
            printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs));
            printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs));

-            gloss += get_network_cost(net) /(net.subdivisions*net.batch);
+            gloss += get_network_cost(net) /(net->subdivisions*net->batch);

            cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch);
-            for(k = 0; k < net.batch; ++k){
-                int index = j*net.batch + k;
+            for(k = 0; k < net->batch; ++k){
+                int index = j*net->batch + k;
                copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1);
                generated.y.vals[index][0] = 0;
            }
@ -977,10 +974,10 @@ void train_lsd(char *cfgfile, char *weightfile, int clear)
    if(weightfile){
        load_weights(&net, weightfile);
    }
-    if(clear) *net.seen = 0;
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    int imgs = net.batch*net.subdivisions;
-    int i = *net.seen/imgs;
+    if(clear) *net->seen = 0;
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    int imgs = net->batch*net->subdivisions;
+    int i = *net->seen/imgs;
    data train, buffer;


@ -989,21 +986,21 @@ void train_lsd(char *cfgfile, char *weightfile, int clear)
    char **paths = (char **)list_to_array(plist);

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
    args.d = &buffer;

-    args.min = net.min_crop;
-    args.max = net.max_crop;
-    args.angle = net.angle;
-    args.aspect = net.aspect;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
-    args.size = net.w;
+    args.min = net->min_crop;
+    args.max = net->max_crop;
+    args.angle = net->angle;
+    args.aspect = net->aspect;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;
+    args.size = net->w;
    args.type = CLASSIFICATION_DATA;
    args.classes = 1;
    char *ls[1] = {"coco"};
@ -1012,7 +1009,7 @@ void train_lsd(char *cfgfile, char *weightfile, int clear)
    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    //while(i*imgs < N*120){
-    while(get_current_batch(net) < net.max_batches){
+    while(get_current_batch(net) < net->max_batches){
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
@ -1045,13 +1042,10 @@ void train_lsd(char *cfgfile, char *weightfile, int clear)
 }
 */

-void test_lsd(char *cfgfile, char *weightfile, char *filename, int gray)
+void test_lsd(char *cfg, char *weights, char *filename, int gray)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);
    srand(2222222);

    clock_t time;
@ -1059,8 +1053,8 @@ void test_lsd(char *cfgfile, char *weightfile, char *filename, int gray)
    char *input = buff;
    int i, imlayer = 0;

-    for (i = 0; i < net.n; ++i) {
-        if (net.layers[i].out_c == 3) {
+    for (i = 0; i < net->n; ++i) {
+        if (net->layers[i].out_c == 3) {
            imlayer = i;
            printf("%d\n", i);
            break;
@ -1078,8 +1072,8 @@ void test_lsd(char *cfgfile, char *weightfile, char *filename, int gray)
            strtok(input, "\n");
        }
        image im = load_image_color(input, 0, 0);
-        image resized = resize_min(im, net.w);
-        image crop = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h);
+        image resized = resize_min(im, net->w);
+        image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h);
        if(gray) grayscale_image_3c(crop);

        float *X = crop.data;
--- a/examples/nightmare.c
+++ b/examples/nightmare.c
@ -49,14 +49,14 @@ void optimize_picture(network *net, image orig, int max_layer, float scale, floa
    net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c);
    cuda_push_array(net->input_gpu, im.data, net->inputs);

-    forward_network_gpu(*net);
+    forward_network_gpu(net);
    copy_gpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1);

    cuda_pull_array(last.delta_gpu, last.delta, last.outputs);
    calculate_loss(last.delta, last.delta, last.outputs, thresh);
    cuda_push_array(last.delta_gpu, last.delta, last.outputs);

-    backward_network_gpu(*net);
+    backward_network_gpu(net);

    cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c);
    cuda_free(net->delta_gpu);
@ -64,10 +64,10 @@ void optimize_picture(network *net, image orig, int max_layer, float scale, floa
 #else
    net->input = im.data;
    net->delta = delta.data;
-    forward_network(*net);
+    forward_network(net);
    copy_cpu(last.outputs, last.output, 1, last.delta, 1);
    calculate_loss(last.output, last.delta, last.outputs, thresh);
-    backward_network(*net);
+    backward_network(net);
 #endif

    if(flip) flip_image(delta);
@ -127,7 +127,7 @@ void smooth(image recon, image update, float lambda, int num)
    }
 }

-void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters)
+void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters)
 {
    int iter = 0;
    for (iter = 0; iter < iters; ++iter) {
@ -135,22 +135,22 @@ void reconstruct_picture(network net, float *features, image recon, image update

 #ifdef GPU
        layer l = get_network_output_layer(net);
-        cuda_push_array(net.input_gpu, recon.data, recon.w*recon.h*recon.c);
-        //cuda_push_array(net.truth_gpu, features, net.truths);
-        net.delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
+        cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c);
+        //cuda_push_array(net->truth_gpu, features, net->truths);
+        net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c);

        forward_network_gpu(net);
        cuda_push_array(l.delta_gpu, features, l.outputs);
        axpy_gpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1);
        backward_network_gpu(net);

-        cuda_pull_array(net.delta_gpu, delta.data, delta.w*delta.h*delta.c);
+        cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c);

-        cuda_free(net.delta_gpu);
+        cuda_free(net->delta_gpu);
 #else
-        net.input = recon.data;
-        net.delta = delta.data;
-        net.truth = features;
+        net->input = recon.data;
+        net->delta = delta.data;
+        net->truth = features;

        forward_network(net);
        backward_network(net);
@ -206,7 +206,7 @@ void run_lsd(int argc, char **argv)
    float *features = 0;
    image update;
    if (reconstruct){
-        im = letterbox_image(im, net.w, net.h);
+        im = letterbox_image(im, net->w, net->h);

        int zz = 0;
        network_predict(net, im.data);
@ -308,12 +308,12 @@ void run_nightmare(int argc, char **argv)
    int reconstruct = find_arg(argc, argv, "-reconstruct");
    int smooth_size = find_int_arg(argc, argv, "-smooth", 1);

-    network net = parse_network_cfg(cfg);
-    load_weights(&net, weights);
+    network *net = parse_network_cfg(cfg);
+    load_weights(net, weights);
    char *cfgbase = basecfg(cfg);
    char *imbase = basecfg(input);

-    set_batch_network(&net, 1);
+    set_batch_network(net, 1);
    image im = load_image_color(input, 0, 0);
    if(0){
        float scale = 1;
@ -325,19 +325,19 @@ void run_nightmare(int argc, char **argv)
        free_image(im);
        im = resized;
    }
-    //im = letterbox_image(im, net.w, net.h);
+    //im = letterbox_image(im, net->w, net->h);

    float *features = 0;
    image update;
    if (reconstruct){
-        net.n = max_layer;
-        im = letterbox_image(im, net.w, net.h);
+        net->n = max_layer;
+        im = letterbox_image(im, net->w, net->h);
        //resize_network(&net, im.w, im.h);

        network_predict(net, im.data);
-        if(net.layers[net.n-1].type == REGION){
+        if(net->layers[net->n-1].type == REGION){
            printf("region!\n");
-            zero_objectness(net.layers[net.n-1]);
+            zero_objectness(net->layers[net->n-1]);
        }
        image out_im = copy_image(get_network_image(net));
        /*
@ -379,7 +379,7 @@ void run_nightmare(int argc, char **argv)
            }else{
                int layer = max_layer + rand()%range - range/2;
                int octave = rand()%octaves;
-                optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm);
+                optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm);
            }
        }
        fprintf(stderr, "done\n");
--- a/examples/regressor.c
+++ b/examples/regressor.c
@ -10,7 +10,7 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
-    network *nets = calloc(ngpus, sizeof(network));
+    network **nets = calloc(ngpus, sizeof(network*));

    srand(time(0));
    int seed = rand();
@ -19,19 +19,15 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
 #ifdef GPU
        cuda_set_device(gpus[i]);
 #endif
-        nets[i] = parse_network_cfg(cfgfile);
-        if(weightfile){
-            load_weights(&nets[i], weightfile);
-        }
-        if(clear) *nets[i].seen = 0;
-        nets[i].learning_rate *= ngpus;
+        nets[i] = load_network(cfgfile, weightfile, clear);
+        nets[i]->learning_rate *= ngpus;
    }
    srand(time(0));
-    network net = nets[0];
+    network *net = nets[0];

-    int imgs = net.batch * net.subdivisions * ngpus;
+    int imgs = net->batch * net->subdivisions * ngpus;

-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    list *options = read_data_cfg(datacfg);

    char *backup_directory = option_find_str(options, "backup", "/backup/");
@ -44,18 +40,18 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    clock_t time;

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.threads = 32;

-    args.min = net.min_crop;
-    args.max = net.max_crop;
-    args.angle = net.angle;
-    args.aspect = net.aspect;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
-    args.size = net.w;
+    args.min = net->min_crop;
+    args.max = net->max_crop;
+    args.angle = net->angle;
+    args.aspect = net->aspect;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;
+    args.size = net->w;

    args.paths = paths;
    args.n = imgs;
@ -68,8 +64,8 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    args.d = &buffer;
    load_thread = load_data(args);

-    int epoch = (*net.seen)/N;
-    while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
+    int epoch = (*net->seen)/N;
+    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        time=clock();

        pthread_join(load_thread, 0);
@ -91,10 +87,10 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
 #endif
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
-        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
        free_data(train);
-        if(*net.seen/N > epoch){
-            epoch = *net.seen/N;
+        if(*net->seen/N > epoch){
+            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
@ -117,11 +113,8 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,

 void predict_regressor(char *cfgfile, char *weightfile, char *filename)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    srand(2222222);

    clock_t time;
@ -138,7 +131,7 @@ void predict_regressor(char *cfgfile, char *weightfile, char *filename)
            strtok(input, "\n");
        }
        image im = load_image_color(input, 0, 0);
-        image sized = letterbox_image(im, net.w, net.h);
+        image sized = letterbox_image(im, net->w, net->h);

        float *X = sized.data;
        time=clock();
@ -156,11 +149,8 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
 {
 #ifdef OPENCV
    printf("Regressor Demo\n");
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);

    srand(2222222);
    CvCapture * cap;
@ -181,7 +171,7 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
        gettimeofday(&tval_before, NULL);

        image in = get_image_from_stream(cap);
-        image in_s = letterbox_image(in, net.w, net.h);
+        image in_s = letterbox_image(in, net->w, net->h);
        show_image(in, "Regressor");

        float *predictions = network_predict(net, in_s.data);
--- a/examples/rnn.c
+++ b/examples/rnn.c
@ -171,17 +171,14 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
    char *base = basecfg(cfgfile);
    fprintf(stderr, "%s\n", base);
    float avg_loss = -1;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
+    network *net = load_network(cfgfile, weightfile, clear);

-    int inputs = net.inputs;
-    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net.learning_rate, net.momentum, net.decay, inputs, net.batch, net.time_steps);
-    int batch = net.batch;
-    int steps = net.time_steps;
-    if(clear) *net.seen = 0;
-    int i = (*net.seen)/net.batch;
+    int inputs = net->inputs;
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net->learning_rate, net->momentum, net->decay, inputs, net->batch, net->time_steps);
+    int batch = net->batch;
+    int steps = net->time_steps;
+    if(clear) *net->seen = 0;
+    int i = (*net->seen)/net->batch;

    int streams = batch/steps;
    size_t *offsets = calloc(streams, sizeof(size_t));
@ -191,7 +188,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
    }

    clock_t time;
-    while(get_current_batch(net) < net.max_batches){
+    while(get_current_batch(net) < net->max_batches){
        i += 1;
        time=clock();
        float_pair p;
@ -201,8 +198,8 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
            p = get_rnn_data(text, offsets, inputs, size, streams, steps);
        }

-        copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1);
-        copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1);
+        copy_cpu(net->inputs*net->batch, p.x, 1, net->input, 1);
+        copy_cpu(net->truths*net->batch, p.y, 1, net->truth, 1);
        float loss = train_network_datum(net) / (batch);
        free(p.x);
        free(p.y);
@ -257,14 +254,11 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
    char *base = basecfg(cfgfile);
    fprintf(stderr, "%s\n", base);

-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    int inputs = net.inputs;
+    network *net = load_network(cfgfile, weightfile, 0);
+    int inputs = net->inputs;

    int i, j;
-    for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
+    for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp;
    int c = 0;
    int len = strlen(seed);
    float *input = calloc(inputs, sizeof(float));
@ -314,14 +308,11 @@ void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp,
    char *base = basecfg(cfgfile);
    fprintf(stderr, "%s\n", base);

-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    int inputs = net.inputs;
+    network *net = load_network(cfgfile, weightfile, 0);
+    int inputs = net->inputs;

    int i, j;
-    for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
+    for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp;
    int c = 0;
    float *input = calloc(inputs, sizeof(float));
    float *out = 0;
@ -362,14 +353,11 @@ void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int r
    char *base = basecfg(cfgfile);
    fprintf(stderr, "%s\n", base);

-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    int inputs = net.inputs;
+    network *net = load_network(cfgfile, weightfile, 0);
+    int inputs = net->inputs;

    int i, j;
-    for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
+    for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp;
    int c = 0;
    float *input = calloc(inputs, sizeof(float));
    float *out = 0;
@ -400,11 +388,8 @@ void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed)
    char *base = basecfg(cfgfile);
    fprintf(stderr, "%s\n", base);

-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    int inputs = net.inputs;
+    network *net = load_network(cfgfile, weightfile, 0);
+    int inputs = net->inputs;

    int count = 0;
    int words = 1;
@ -452,11 +437,8 @@ void valid_char_rnn(char *cfgfile, char *weightfile, char *seed)
    char *base = basecfg(cfgfile);
    fprintf(stderr, "%s\n", base);

-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    int inputs = net.inputs;
+    network *net = load_network(cfgfile, weightfile, 0);
+    int inputs = net->inputs;

    int count = 0;
    int words = 1;
@ -493,11 +475,8 @@ void vec_char_rnn(char *cfgfile, char *weightfile, char *seed)
    char *base = basecfg(cfgfile);
    fprintf(stderr, "%s\n", base);

-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    int inputs = net.inputs;
+    network *net = load_network(cfgfile, weightfile, 0);
+    int inputs = net->inputs;

    int c;
    int seed_len = strlen(seed);
@ -525,7 +504,7 @@ void vec_char_rnn(char *cfgfile, char *weightfile, char *seed)
        network_predict(net, input);
        input[(int)c] = 0;

-        layer l = net.layers[0];
+        layer l = net->layers[0];
        #ifdef GPU
        cuda_pull_array(l.output_gpu, l.output, l.outputs);
        #endif
--- a/examples/segmenter.c
+++ b/examples/segmenter.c
@ -10,7 +10,7 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
-    network *nets = calloc(ngpus, sizeof(network));
+    network **nets = calloc(ngpus, sizeof(network*));

    srand(time(0));
    int seed = rand();
@ -19,23 +19,20 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
 #ifdef GPU
        cuda_set_device(gpus[i]);
 #endif
-        nets[i] = parse_network_cfg(cfgfile);
-        if(weightfile){
-            load_weights(&nets[i], weightfile);
-        }
-        if(clear) *nets[i].seen = 0;
+        nets[i] = load_network(cfgfile, weightfile, clear);
+        nets[i]->learning_rate *= ngpus;
    }
    srand(time(0));
-    network net = nets[0];
+    network *net = nets[0];
    image pred = get_network_image(net);

-    int div = net.w/pred.w;
-    assert(pred.w * div == net.w);
-    assert(pred.h * div == net.h);
+    int div = net->w/pred.w;
+    assert(pred.w * div == net->w);
+    assert(pred.h * div == net->h);

-    int imgs = net.batch * net.subdivisions * ngpus;
+    int imgs = net->batch * net->subdivisions * ngpus;

-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    list *options = read_data_cfg(datacfg);

    char *backup_directory = option_find_str(options, "backup", "/backup/");
@ -48,19 +45,19 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    clock_t time;

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.threads = 32;
    args.scale = div;

-    args.min = net.min_crop;
-    args.max = net.max_crop;
-    args.angle = net.angle;
-    args.aspect = net.aspect;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
-    args.size = net.w;
+    args.min = net->min_crop;
+    args.max = net->max_crop;
+    args.angle = net->angle;
+    args.aspect = net->aspect;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;
+    args.size = net->w;
    args.classes = 80;

    args.paths = paths;
@ -74,8 +71,8 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    args.d = &buffer;
    load_thread = load_data(args);

-    int epoch = (*net.seen)/N;
-    while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
+    int epoch = (*net->seen)/N;
+    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        time=clock();

        pthread_join(load_thread, 0);
@ -96,8 +93,8 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
        loss = train_network(net, train);
 #endif
        if(display){
-            image tr = float_to_image(net.w/div, net.h/div, 80, train.y.vals[net.batch*(net.subdivisions-1)]);
-            image im = float_to_image(net.w, net.h, net.c, train.X.vals[net.batch*(net.subdivisions-1)]);
+            image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]);
+            image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]);
            image mask = mask_to_rgb(tr);
            image prmask = mask_to_rgb(pred);
            show_image(im, "input");
@ -111,10 +108,10 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
        }
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
-        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
        free_data(train);
-        if(*net.seen/N > epoch){
-            epoch = *net.seen/N;
+        if(*net->seen/N > epoch){
+            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
@ -135,13 +132,10 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    free(base);
 }

-void predict_segmenter(char *datafile, char *cfgfile, char *weightfile, char *filename)
+void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);
    srand(2222222);

    clock_t time;
@ -158,7 +152,7 @@ void predict_segmenter(char *datafile, char *cfgfile, char *weightfile, char *fi
            strtok(input, "\n");
        }
        image im = load_image_color(input, 0, 0);
-        image sized = letterbox_image(im, net.w, net.h);
+        image sized = letterbox_image(im, net->w, net->h);

        float *X = sized.data;
        time=clock();
@ -180,15 +174,12 @@ void predict_segmenter(char *datafile, char *cfgfile, char *weightfile, char *fi
 }


-void demo_segmenter(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
+void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename)
 {
 #ifdef OPENCV
    printf("Classifier Demo\n");
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);

    srand(2222222);
    CvCapture * cap;
@ -209,7 +200,7 @@ void demo_segmenter(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
        gettimeofday(&tval_before, NULL);

        image in = get_image_from_stream(cap);
-        image in_s = letterbox_image(in, net.w, net.h);
+        image in_s = letterbox_image(in, net->w, net->h);

        network_predict(net, in_s.data);

--- a/examples/super.c
+++ b/examples/super.c
@ -8,14 +8,10 @@ void train_super(char *cfgfile, char *weightfile, int clear)
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    float avg_loss = -1;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    if(clear) *net.seen = 0;
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    int imgs = net.batch*net.subdivisions;
-    int i = *net.seen/imgs;
+    network *net = load_network(cfgfile, weightfile, clear);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    int imgs = net->batch*net->subdivisions;
+    int i = *net->seen/imgs;
    data train, buffer;


@ -24,8 +20,8 @@ void train_super(char *cfgfile, char *weightfile, int clear)
    char **paths = (char **)list_to_array(plist);

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.scale = 4;
    args.paths = paths;
    args.n = imgs;
@ -36,7 +32,7 @@ void train_super(char *cfgfile, char *weightfile, int clear)
    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    //while(i*imgs < N*120){
-    while(get_current_batch(net) < net.max_batches){
+    while(get_current_batch(net) < net->max_batches){
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
@ -70,11 +66,8 @@ void train_super(char *cfgfile, char *weightfile, int clear)

 void test_super(char *cfgfile, char *weightfile, char *filename)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    srand(2222222);

    clock_t time;
@ -91,7 +84,7 @@ void test_super(char *cfgfile, char *weightfile, char *filename)
            strtok(input, "\n");
        }
        image im = load_image_color(input, 0, 0);
-        resize_network(&net, im.w, im.h);
+        resize_network(net, im.w, im.h);
        printf("%d %d\n", im.w, im.h);

        float *X = im.data;
--- a/examples/tag.c
+++ b/examples/tag.c
@ -7,12 +7,8 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
    char *base = basecfg(cfgfile);
    char *backup_directory = "/home/pjreddie/backup/";
    printf("%s\n", base);
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    if(clear) *net.seen = 0;
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfgfile, weightfile, clear);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    int imgs = 1024;
    list *plist = get_paths("/home/pjreddie/tag/train.list");
    char **paths = (char **)list_to_array(plist);
@ -24,30 +20,30 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
    data buffer;

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;

-    args.min = net.w;
-    args.max = net.max_crop;
-    args.size = net.w;
+    args.min = net->w;
+    args.max = net->max_crop;
+    args.size = net->w;

    args.paths = paths;
-    args.classes = net.outputs;
+    args.classes = net->outputs;
    args.n = imgs;
    args.m = N;
    args.d = &buffer;
    args.type = TAG_DATA;

-    args.angle = net.angle;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
+    args.angle = net->angle;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;

-    fprintf(stderr, "%d classes\n", net.outputs);
+    fprintf(stderr, "%d classes\n", net->outputs);

    load_thread = load_data_in_thread(args);
-    int epoch = (*net.seen)/N;
-    while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
+    int epoch = (*net->seen)/N;
+    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
        time=clock();
        pthread_join(load_thread, 0);
        train = buffer;
@ -58,10 +54,10 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
        float loss = train_network(net, train);
        if(avg_loss == -1) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;
-        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
        free_data(train);
-        if(*net.seen/N > epoch){
-            epoch = *net.seen/N;
+        if(*net->seen/N > epoch){
+            epoch = *net->seen/N;
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
            save_weights(net, buff);
@ -86,11 +82,8 @@ void train_tag(char *cfgfile, char *weightfile, int clear)

 void test_tag(char *cfgfile, char *weightfile, char *filename)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    srand(2222222);
    int i = 0;
    char **names = get_labels("data/tags.txt");
@ -98,7 +91,7 @@ void test_tag(char *cfgfile, char *weightfile, char *filename)
    int indexes[10];
    char buff[256];
    char *input = buff;
-    int size = net.w;
+    int size = net->w;
    while(1){
        if(filename){
            strncpy(input, filename, 256);
@ -111,7 +104,7 @@ void test_tag(char *cfgfile, char *weightfile, char *filename)
        }
        image im = load_image_color(input, 0, 0);
        image r = resize_min(im, size);
-        resize_network(&net, r.w, r.h);
+        resize_network(net, r.w, r.h);
        printf("%d %d\n", r.w, r.h);

        float *X = r.data;
--- a/examples/yolo.c
+++ b/examples/yolo.c
@ -10,17 +10,14 @@ void train_yolo(char *cfgfile, char *weightfile)
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    float avg_loss = -1;
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    int imgs = net.batch*net.subdivisions;
-    int i = *net.seen/imgs;
+    network *net = load_network(cfgfile, weightfile, 0);
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    int imgs = net->batch*net->subdivisions;
+    int i = *net->seen/imgs;
    data train, buffer;


-    layer l = net.layers[net.n - 1];
+    layer l = net->layers[net->n - 1];

    int side = l.side;
    int classes = l.classes;
@ -31,8 +28,8 @@ void train_yolo(char *cfgfile, char *weightfile)
    char **paths = (char **)list_to_array(plist);

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
@ -42,15 +39,15 @@ void train_yolo(char *cfgfile, char *weightfile)
    args.d = &buffer;
    args.type = REGION_DATA;

-    args.angle = net.angle;
-    args.exposure = net.exposure;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
+    args.angle = net->angle;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    //while(i*imgs < N*120){
-    while(get_current_batch(net) < net.max_batches){
+    while(get_current_batch(net) < net->max_batches){
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
@ -98,14 +95,11 @@ void print_yolo_detections(FILE **fps, char *id, box *boxes, float **probs, int
    }
 }

-void validate_yolo(char *cfgfile, char *weightfile)
+void validate_yolo(char *cfg, char *weights)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
-    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    srand(time(0));

    char *base = "results/comp4_det_test_";
@ -114,7 +108,7 @@ void validate_yolo(char *cfgfile, char *weightfile)
    //list *plist = get_paths("data/voc.2012.test");
    char **paths = (char **)list_to_array(plist);

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    int classes = l.classes;

    int j;
@ -144,8 +138,8 @@ void validate_yolo(char *cfgfile, char *weightfile)
    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));

    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
+    args.w = net->w;
+    args.h = net->h;
    args.type = IMAGE_DATA;

    for(t = 0; t < nthreads; ++t){
@ -186,21 +180,18 @@ void validate_yolo(char *cfgfile, char *weightfile)
    fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
 }

-void validate_yolo_recall(char *cfgfile, char *weightfile)
+void validate_yolo_recall(char *cfg, char *weights)
 {
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
-    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
    srand(time(0));

    char *base = "results/comp4_det_test_";
    list *plist = get_paths("data/voc.2007.test");
    char **paths = (char **)list_to_array(plist);

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    int classes = l.classes;
    int side = l.side;

@ -230,7 +221,7 @@ void validate_yolo_recall(char *cfgfile, char *weightfile)
    for(i = 0; i < m; ++i){
        char *path = paths[i];
        image orig = load_image_color(path, 0, 0);
-        image sized = resize_image(orig, net.w, net.h);
+        image sized = resize_image(orig, net->w, net->h);
        char *id = basecfg(path);
        network_predict(net, sized.data);
        get_detection_boxes(l, orig.w, orig.h, thresh, probs, boxes, 1);
@ -275,12 +266,9 @@ void validate_yolo_recall(char *cfgfile, char *weightfile)
 void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
 {
    image **alphabet = load_alphabet();
-    network net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    layer l = net.layers[net.n-1];
-    set_batch_network(&net, 1);
+    network *net = load_network(cfgfile, weightfile, 0);
+    layer l = net->layers[net->n-1];
+    set_batch_network(net, 1);
    srand(2222222);
    clock_t time;
    char buff[256];
@ -301,7 +289,7 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
            strtok(input, "\n");
        }
        image im = load_image_color(input,0,0);
-        image sized = resize_image(im, net.w, net.h);
+        image sized = resize_image(im, net->w, net->h);
        float *X = sized.data;
        time=clock();
        network_predict(net, X);
--- a/include/darknet.h
+++ b/include/darknet.h
@ -446,12 +446,15 @@ typedef struct network{
    int h, w, c;
    int max_crop;
    int min_crop;
+    float max_ratio;
+    float min_ratio;
    int center;
    float angle;
    float aspect;
    float exposure;
    float saturation;
    float hue;
+    int random;

    int gpu_index;
    tree *hierarchy;
@ -553,9 +556,8 @@ typedef struct{
 } box_label;


-network load_network(char *cfg, char *weights, int clear);
-network *load_network_p(char *cfg, char *weights, int clear);
-load_args get_base_args(network net);
+network *load_network(char *cfg, char *weights, int clear);
+load_args get_base_args(network *net);

 void free_data(data d);

@ -575,10 +577,11 @@ pthread_t load_data(load_args args);
 list *read_data_cfg(char *filename);
 list *read_cfg(char *filename);
 unsigned char *read_file(char *filename);
+data resize_data(data orig, int w, int h);

-void forward_network(network net);
-void backward_network(network net);
-void update_network(network net);
+void forward_network(network *net);
+void backward_network(network *net);
+void update_network(network *net);


 void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
@ -600,20 +603,20 @@ void cuda_pull_array(float *x_gpu, float *x, size_t n);
 float cuda_mag_array(float *x_gpu, size_t n);
 void cuda_push_array(float *x_gpu, float *x, size_t n);

-void forward_network_gpu(network net);
-void backward_network_gpu(network net);
-void update_network_gpu(network net);
+void forward_network_gpu(network *net);
+void backward_network_gpu(network *net);
+void update_network_gpu(network *net);

-float train_networks(network *nets, int n, data d, int interval);
-void sync_nets(network *nets, int n, int interval);
-void harmless_update_network_gpu(network net);
+float train_networks(network **nets, int n, data d, int interval);
+void sync_nets(network **nets, int n, int interval);
+void harmless_update_network_gpu(network *net);
 #endif
 void save_image_png(image im, const char *name);
 void get_next_batch(data d, int n, int offset, float *X, float *y);
 void grayscale_image_3c(image im);
 void normalize_image(image p);
 void matrix_to_csv(matrix m);
-float train_network_sgd(network net, data d, int n);
+float train_network_sgd(network *net, data d, int n);
 void rgbgr_image(image im);
 data copy_data(data d);
 data concat_data(data d1, data d2);
@ -622,8 +625,8 @@ float matrix_topk_accuracy(matrix truth, matrix guess, int k);
 void matrix_add_matrix(matrix from, matrix to);
 void scale_matrix(matrix m, float scale);
 matrix csv_to_matrix(char *filename);
-float *network_accuracies(network net, data d, int n);
-float train_network_datum(network net);
+float *network_accuracies(network *net, data d, int n);
+float train_network_datum(network *net);
 image make_random_image(int w, int h, int c);

 void denormalize_connected_layer(layer l);
@ -639,17 +642,17 @@ void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box
 char *option_find_str(list *l, char *key, char *def);
 int option_find_int(list *l, char *key, int def);

-network parse_network_cfg(char *filename);
-void save_weights(network net, char *filename);
+network *parse_network_cfg(char *filename);
+void save_weights(network *net, char *filename);
 void load_weights(network *net, char *filename);
-void save_weights_upto(network net, char *filename, int cutoff);
+void save_weights_upto(network *net, char *filename, int cutoff);
 void load_weights_upto(network *net, char *filename, int start, int cutoff);

 void zero_objectness(layer l);
 void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, float **masks, int only_objectness, int *map, float tree_thresh, int relative);
-void free_network(network net);
+void free_network(network *net);
 void set_batch_network(network *net, int b);
-void set_temp_network(network net, float t);
+void set_temp_network(network *net, float t);
 image load_image(char *filename, int w, int h, int c);
 image load_image_color(char *filename, int w, int h);
 image make_image(int w, int h, int c);
@ -657,6 +660,7 @@ image resize_image(image im, int w, int h);
 image letterbox_image(image im, int w, int h);
 image crop_image(image im, int dx, int dy, int w, int h);
 image resize_min(image im, int min);
+image resize_max(image im, int max);
 image threshold_image(image im, float thresh);
 image mask_to_rgb(image mask);
 int resize_network(network *net, int w, int h);
@ -666,25 +670,25 @@ void save_image(image p, const char *name);
 void show_image(image p, const char *name);
 image copy_image(image p);
 void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
-float get_current_rate(network net);
+float get_current_rate(network *net);
 void composite_3d(char *f1, char *f2, char *out, int delta);
 data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
-size_t get_current_batch(network net);
+size_t get_current_batch(network *net);
 void constrain_image(image im);
-image get_network_image_layer(network net, int i);
-layer get_network_output_layer(network net);
-void top_predictions(network net, int n, int *index);
+image get_network_image_layer(network *net, int i);
+layer get_network_output_layer(network *net);
+void top_predictions(network *net, int n, int *index);
 void flip_image(image a);
 image float_to_image(int w, int h, int c, float *data);
 void ghost_image(image source, image dest, int dx, int dy);
-float network_accuracy(network net, data d);
+float network_accuracy(network *net, data d);
 void random_distort_image(image im, float hue, float saturation, float exposure);
 void fill_image(image m, float s);
 image grayscale_image(image im);
 void rotate_image_cw(image im, int times);
 double what_time_is_it_now();
 image rotate_image(image m, float rad);
-void visualize_network(network net);
+void visualize_network(network *net);
 float box_iou(box a, box b);
 void do_nms(box *boxes, float **probs, int total, int classes, float thresh);
 data load_all_cifar10();
@ -692,11 +696,10 @@ box_label *read_boxes(char *filename, int *n);
 box float_to_box(float *f, int stride);
 void draw_detections(image im, int num, float thresh, box *boxes, float **probs, float **masks, char **names, image **alphabet, int classes);

-matrix network_predict_data(network net, data test);
+matrix network_predict_data(network *net, data test);
 image **load_alphabet();
-image get_network_image(network net);
-float *network_predict(network net, float *input);
-float *network_predict_p(network *net, float *input);
+image get_network_image(network *net);
+float *network_predict(network *net, float *input);

 int network_width(network *net);
 int network_height(network *net);
@ -705,8 +708,7 @@ void network_detect(network *net, image im, float thresh, float hier_thresh, flo
 int num_boxes(network *net);
 box *make_boxes(network *net);

-void reset_network_state(network net, int b);
-void reset_network_state(network net, int b);
+void reset_network_state(network *net, int b);

 char **get_labels(char *filename);
 void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh);
@ -720,7 +722,7 @@ image get_image_from_stream(CvCapture *cap);
 #endif
 #endif
 void free_image(image m);
-float train_network(network net, data d);
+float train_network(network *net, data d);
 pthread_t load_data_in_thread(load_args args);
 void load_data_blocking(load_args args);
 list *get_paths(char *filename);
--- a/python/darknet.py
+++ b/python/darknet.py
@ -38,7 +38,7 @@ lib.network_width.restype = c_int
 lib.network_height.argtypes = [c_void_p]
 lib.network_height.restype = c_int

-predict = lib.network_predict_p
+predict = lib.network_predict
 predict.argtypes = [c_void_p, POINTER(c_float)]
 predict.restype = POINTER(c_float)

@ -57,13 +57,13 @@ make_probs = lib.make_probs
 make_probs.argtypes = [c_void_p]
 make_probs.restype = POINTER(POINTER(c_float))

-detect = lib.network_predict_p
+detect = lib.network_predict
 detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))]

 reset_rnn = lib.reset_rnn
 reset_rnn.argtypes = [c_void_p]

-load_net = lib.load_network_p
+load_net = lib.load_network
 load_net.argtypes = [c_char_p, c_char_p, c_int]
 load_net.restype = c_void_p

--- a/src/data.c
+++ b/src/data.c
@ -1172,11 +1172,32 @@ data load_data_regression(char **paths, int n, int m, int min, int max, int size
    return d;
 }

+data resize_data(data orig, int w, int h)
+{
+    data d = {0};
+    d.shallow = 0;
+    d.w = w;
+    d.h = h;
+    int i;
+    d.X.rows = orig.X.rows;
+    d.X.cols = w*h*3;
+    d.X.vals = calloc(d.X.rows, sizeof(float));
+
+    d.y = copy_matrix(orig.y);
+    for(i = 0; i < orig.X.rows; ++i){
+        image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]);
+        d.X.vals[i] = resize_image(im, w, h).data;
+    }
+    return d;
+}
+
 data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center)
 {
    if(m) paths = get_random_paths(paths, n, m);
    data d = {0};
    d.shallow = 0;
+    d.w=size;
+    d.h=size;
    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center);
    d.y = load_labels_paths(paths, n, labels, k, hierarchy);
    if(m) free(paths);
--- a/src/demo.c
+++ b/src/demo.c
@ -19,7 +19,7 @@ static int demo_classes;

 static float **probs;
 static box *boxes;
-static network net;
+static network *net;
 static image buff [3];
 static image buff_letter[3];
 static int buff_index = 0;
@ -43,7 +43,7 @@ void *detect_in_thread(void *ptr)
    running = 1;
    float nms = .4;

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    float *X = buff_letter[(buff_index+2)%3].data;
    float *prediction = network_predict(net, X);

@ -53,7 +53,7 @@ void *detect_in_thread(void *ptr)
    if(l.type == DETECTION){
        get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
    } else if (l.type == REGION){
-        get_region_boxes(l, buff[0].w, buff[0].h, net.w, net.h, demo_thresh, probs, boxes, 0, 0, 0, demo_hier, 1);
+        get_region_boxes(l, buff[0].w, buff[0].h, net->w, net->h, demo_thresh, probs, boxes, 0, 0, 0, demo_hier, 1);
    } else {
        error("Last layer must produce detections\n");
    }
@ -74,7 +74,7 @@ void *detect_in_thread(void *ptr)
 void *fetch_in_thread(void *ptr)
 {
    int status = fill_image_from_stream(cap, buff[buff_index]);
-    letterbox_image_into(buff[buff_index], net.w, net.h, buff_letter[buff_index]);
+    letterbox_image_into(buff[buff_index], net->w, net->h, buff_letter[buff_index]);
    if(status == 0) demo_done = 1;
    return 0;
 }
@ -126,11 +126,8 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
    demo_thresh = thresh;
    demo_hier = hier;
    printf("Demo\n");
-    net = parse_network_cfg(cfgfile);
-    if(weightfile){
-        load_weights(&net, weightfile);
-    }
-    set_batch_network(&net, 1);
+    net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
    pthread_t detect_thread;
    pthread_t fetch_thread;

@ -155,7 +152,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch

    if(!cap) error("Couldn't connect to webcam.\n");

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    demo_detections = l.n*l.w*l.h;
    int j;

@ -169,9 +166,9 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
    buff[0] = get_image_from_stream(cap);
    buff[1] = copy_image(buff[0]);
    buff[2] = copy_image(buff[0]);
-    buff_letter[0] = letterbox_image(buff[0], net.w, net.h);
-    buff_letter[1] = letterbox_image(buff[0], net.w, net.h);
-    buff_letter[2] = letterbox_image(buff[0], net.w, net.h);
+    buff_letter[0] = letterbox_image(buff[0], net->w, net->h);
+    buff_letter[1] = letterbox_image(buff[0], net->w, net->h);
+    buff_letter[2] = letterbox_image(buff[0], net->w, net->h);
    ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c);

    int count = 0;
@ -218,7 +215,7 @@ void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float th
    demo_hier = hier;
    printf("Demo\n");
    net = load_network(cfg1, weight1, 0);
-    set_batch_network(&net, 1);
+    set_batch_network(net, 1);
    pthread_t detect_thread;
    pthread_t fetch_thread;

@ -243,7 +240,7 @@ void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float th

    if(!cap) error("Couldn't connect to webcam.\n");

-    layer l = net.layers[net.n-1];
+    layer l = net->layers[net->n-1];
    demo_detections = l.n*l.w*l.h;
    int j;

@ -257,9 +254,9 @@ void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float th
    buff[0] = get_image_from_stream(cap);
    buff[1] = copy_image(buff[0]);
    buff[2] = copy_image(buff[0]);
-    buff_letter[0] = letterbox_image(buff[0], net.w, net.h);
-    buff_letter[1] = letterbox_image(buff[0], net.w, net.h);
-    buff_letter[2] = letterbox_image(buff[0], net.w, net.h);
+    buff_letter[0] = letterbox_image(buff[0], net->w, net->h);
+    buff_letter[1] = letterbox_image(buff[0], net->w, net->h);
+    buff_letter[2] = letterbox_image(buff[0], net->w, net->h);
    ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c);

    int count = 0;
--- a/src/network.c
+++ b/src/network.c
@ -30,64 +30,46 @@
 #include "parser.h"
 #include "data.h"

-load_args get_base_args(network net)
+load_args get_base_args(network *net)
 {
    load_args args = {0};
-    args.w = net.w;
-    args.h = net.h;
-    args.size = net.w;
+    args.w = net->w;
+    args.h = net->h;
+    args.size = net->w;

-    args.min = net.min_crop;
-    args.max = net.max_crop;
-    args.angle = net.angle;
-    args.aspect = net.aspect;
-    args.exposure = net.exposure;
-    args.center = net.center;
-    args.saturation = net.saturation;
-    args.hue = net.hue;
+    args.min = net->min_crop;
+    args.max = net->max_crop;
+    args.angle = net->angle;
+    args.aspect = net->aspect;
+    args.exposure = net->exposure;
+    args.center = net->center;
+    args.saturation = net->saturation;
+    args.hue = net->hue;
    return args;
 }

-network load_network(char *cfg, char *weights, int clear)
+network *load_network(char *cfg, char *weights, int clear)
 {
-    network net = parse_network_cfg(cfg);
+    network *net = parse_network_cfg(cfg);
    if(weights && weights[0] != 0){
-        load_weights(&net, weights);
+        load_weights(net, weights);
    }
-    if(clear) *net.seen = 0;
+    if(clear) (*net->seen) = 0;
    return net;
 }

-network *load_network_p(char *cfg, char *weights, int clear)
+size_t get_current_batch(network *net)
 {
-    network *net = calloc(1, sizeof(network));
-    *net = load_network(cfg, weights, clear);
-    return net;
-}
-
-size_t get_current_batch(network net)
-{
-    size_t batch_num = (*net.seen)/(net.batch*net.subdivisions);
+    size_t batch_num = (*net->seen)/(net->batch*net->subdivisions);
    return batch_num;
 }

-void reset_momentum(network net)
-{
-    if (net.momentum == 0) return;
-    net.learning_rate = 0;
-    net.momentum = 0;
-    net.decay = 0;
-    #ifdef GPU
-        //if(net.gpu_index >= 0) update_network_gpu(net);
-    #endif
-}
-
-void reset_network_state(network net, int b)
+void reset_network_state(network *net, int b)
 {
    int i;
-    for (i = 0; i < net.n; ++i) {
+    for (i = 0; i < net->n; ++i) {
        #ifdef GPU
-        layer l = net.layers[i];
+        layer l = net->layers[i];
        if(l.state_gpu){
            fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1);
        }
@ -100,39 +82,38 @@ void reset_network_state(network net, int b)

 void reset_rnn(network *net)
 {
-    reset_network_state(*net, 0);
+    reset_network_state(net, 0);
 }

-float get_current_rate(network net)
+float get_current_rate(network *net)
 {
    size_t batch_num = get_current_batch(net);
    int i;
    float rate;
-    if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power);
-    switch (net.policy) {
+    if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power);
+    switch (net->policy) {
        case CONSTANT:
-            return net.learning_rate;
+            return net->learning_rate;
        case STEP:
-            return net.learning_rate * pow(net.scale, batch_num/net.step);
+            return net->learning_rate * pow(net->scale, batch_num/net->step);
        case STEPS:
-            rate = net.learning_rate;
-            for(i = 0; i < net.num_steps; ++i){
-                if(net.steps[i] > batch_num) return rate;
-                rate *= net.scales[i];
-                //if(net.steps[i] > batch_num - 1 && net.scales[i] > 1) reset_momentum(net);
+            rate = net->learning_rate;
+            for(i = 0; i < net->num_steps; ++i){
+                if(net->steps[i] > batch_num) return rate;
+                rate *= net->scales[i];
            }
            return rate;
        case EXP:
-            return net.learning_rate * pow(net.gamma, batch_num);
+            return net->learning_rate * pow(net->gamma, batch_num);
        case POLY:
-            return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power);
+            return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power);
        case RANDOM:
-            return net.learning_rate * pow(rand_uniform(0,1), net.power);
+            return net->learning_rate * pow(rand_uniform(0,1), net->power);
        case SIG:
-            return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step))));
+            return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step))));
        default:
            fprintf(stderr, "Policy is weird!\n");
-            return net.learning_rate;
+            return net->learning_rate;
    }
 }

@ -189,19 +170,26 @@ char *get_layer_string(LAYER_TYPE a)
    return "none";
 }

-network make_network(int n)
+network *make_network(int n)
 {
-    network net = {0};
-    net.n = n;
-    net.layers = calloc(net.n, sizeof(layer));
-    net.seen = calloc(1, sizeof(size_t));
-    net.t    = calloc(1, sizeof(int));
-    net.cost = calloc(1, sizeof(float));
+    network *net = calloc(1, sizeof(network));
+    net->n = n;
+    net->layers = calloc(net->n, sizeof(layer));
+    net->seen = calloc(1, sizeof(size_t));
+    net->t    = calloc(1, sizeof(int));
+    net->cost = calloc(1, sizeof(float));
    return net;
 }

-void forward_network(network net)
+void forward_network(network *netp)
 {
+#ifdef GPU
+    if(netp->gpu_index >= 0){
+        forward_network_gpu(netp);   
+        return;
+    }
+#endif
+    network net = *netp;
    int i;
    for(i = 0; i < net.n; ++i){
        net.index = i;
@ -215,15 +203,22 @@ void forward_network(network net)
            net.truth = l.output;
        }
    }
-    calc_network_cost(net);
+    calc_network_cost(netp);
 }

-void update_network(network net)
+void update_network(network *netp)
 {
+#ifdef GPU
+    if(netp->gpu_index >= 0){
+        update_network_gpu(netp);   
+        return;
+    }
+#endif
+    network net = *netp;
    int i;
    update_args a = {0};
    a.batch = net.batch*net.subdivisions;
-    a.learning_rate = get_current_rate(net);
+    a.learning_rate = get_current_rate(netp);
    a.momentum = net.momentum;
    a.decay = net.decay;
    a.adam = net.adam;
@ -241,8 +236,9 @@ void update_network(network net)
    }
 }

-void calc_network_cost(network net)
+void calc_network_cost(network *netp)
 {
+    network net = *netp;
    int i;
    float sum = 0;
    int count = 0;
@ -255,13 +251,20 @@ void calc_network_cost(network net)
    *net.cost = sum/count;
 }

-int get_predicted_class_network(network net)
+int get_predicted_class_network(network *net)
 {
-    return max_index(net.output, net.outputs);
+    return max_index(net->output, net->outputs);
 }

-void backward_network(network net)
+void backward_network(network *netp)
 {
+#ifdef GPU
+    if(netp->gpu_index >= 0){
+        backward_network_gpu(netp);   
+        return;
+    }
+#endif
+    network net = *netp;
    int i;
    network orig = net;
    for(i = net.n-1; i >= 0; --i){
@ -279,55 +282,52 @@ void backward_network(network net)
    }
 }

-float train_network_datum(network net)
+float train_network_datum(network *net)
 {
-#ifdef GPU
-    if(gpu_index >= 0) return train_network_datum_gpu(net);
-#endif
-    *net.seen += net.batch;
-    net.train = 1;
+    *net->seen += net->batch;
+    net->train = 1;
    forward_network(net);
    backward_network(net);
-    float error = *net.cost;
-    if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net);
+    float error = *net->cost;
+    if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net);
    return error;
 }

-float train_network_sgd(network net, data d, int n)
+float train_network_sgd(network *net, data d, int n)
 {
-    int batch = net.batch;
+    int batch = net->batch;

    int i;
    float sum = 0;
    for(i = 0; i < n; ++i){
-        get_random_batch(d, batch, net.input, net.truth);
+        get_random_batch(d, batch, net->input, net->truth);
        float err = train_network_datum(net);
        sum += err;
    }
    return (float)sum/(n*batch);
 }

-float train_network(network net, data d)
+float train_network(network *net, data d)
 {
-    assert(d.X.rows % net.batch == 0);
-    int batch = net.batch;
+    assert(d.X.rows % net->batch == 0);
+    int batch = net->batch;
    int n = d.X.rows / batch;

    int i;
    float sum = 0;
    for(i = 0; i < n; ++i){
-        get_next_batch(d, batch, i*batch, net.input, net.truth);
+        get_next_batch(d, batch, i*batch, net->input, net->truth);
        float err = train_network_datum(net);
        sum += err;
    }
    return (float)sum/(n*batch);
 }

-void set_temp_network(network net, float t)
+void set_temp_network(network *net, float t)
 {
    int i;
-    for(i = 0; i < net.n; ++i){
-        net.layers[i].temperature = t;
+    for(i = 0; i < net->n; ++i){
+        net->layers[i].temperature = t;
    }
 }

@ -395,7 +395,7 @@ int resize_network(network *net, int w, int h)
        h = l.out_h;
        if(l.type == AVGPOOL) break;
    }
-    layer out = get_network_output_layer(*net);
+    layer out = get_network_output_layer(net);
    net->inputs = net->layers[0].inputs;
    net->outputs = out.outputs;
    net->truths = out.outputs;
@ -424,22 +424,22 @@ int resize_network(network *net, int w, int h)
    return 0;
 }

-detection_layer get_network_detection_layer(network net)
+layer get_network_detection_layer(network *net)
 {
    int i;
-    for(i = 0; i < net.n; ++i){
-        if(net.layers[i].type == DETECTION){
-            return net.layers[i];
+    for(i = 0; i < net->n; ++i){
+        if(net->layers[i].type == DETECTION){
+            return net->layers[i];
        }
    }
    fprintf(stderr, "Detection layer not found!!\n");
-    detection_layer l = {0};
+    layer l = {0};
    return l;
 }

-image get_network_image_layer(network net, int i)
+image get_network_image_layer(network *net, int i)
 {
-    layer l = net.layers[i];
+    layer l = net->layers[i];
 #ifdef GPU
    //cuda_pull_array(l.output_gpu, l.output, l.outputs);
 #endif
@ -450,10 +450,10 @@ image get_network_image_layer(network net, int i)
    return def;
 }

-image get_network_image(network net)
+image get_network_image(network *net)
 {
    int i;
-    for(i = net.n-1; i >= 0; --i){
+    for(i = net->n-1; i >= 0; --i){
        image m = get_network_image_layer(net, i);
        if(m.h != 0) return m;
    }
@ -461,37 +461,37 @@ image get_network_image(network net)
    return def;
 }

-void visualize_network(network net)
+void visualize_network(network *net)
 {
    image *prev = 0;
    int i;
    char buff[256];
-    for(i = 0; i < net.n; ++i){
+    for(i = 0; i < net->n; ++i){
        sprintf(buff, "Layer %d", i);
-        layer l = net.layers[i];
+        layer l = net->layers[i];
        if(l.type == CONVOLUTIONAL){
            prev = visualize_convolutional_layer(l, buff, prev);
        }
    } 
 }

-void top_predictions(network net, int k, int *index)
+void top_predictions(network *net, int k, int *index)
 {
-    top_k(net.output, net.outputs, k, index);
+    top_k(net->output, net->outputs, k, index);
 }


-float *network_predict(network net, float *input)
+float *network_predict(network *net, float *input)
 {
-#ifdef GPU
-    if(gpu_index >= 0)  return network_predict_gpu(net, input);
-#endif
-    net.input = input;
-    net.truth = 0;
-    net.train = 0;
-    net.delta = 0;
+    network orig = *net;
+    net->input = input;
+    net->truth = 0;
+    net->train = 0;
+    net->delta = 0;
    forward_network(net);
-    return net.output;
+    float *out = net->output;
+    *net = orig;
+    return out;
 }

 int num_boxes(network *net)
@ -526,16 +526,11 @@ void network_detect(network *net, image im, float thresh, float hier_thresh, flo
    }
 }

-float *network_predict_p(network *net, float *input)
-{
-    return network_predict(*net, input);
-}
-
 float *network_predict_image(network *net, image im)
 {
    image imr = letterbox_image(im, net->w, net->h);
    set_batch_network(net, 1);
-    float *p = network_predict(*net, imr.data);
+    float *p = network_predict(net, imr.data);
    free_image(imr);
    return p;
 }
@ -543,20 +538,20 @@ float *network_predict_image(network *net, image im)
 int network_width(network *net){return net->w;}
 int network_height(network *net){return net->h;}

-matrix network_predict_data_multi(network net, data test, int n)
+matrix network_predict_data_multi(network *net, data test, int n)
 {
    int i,j,b,m;
-    int k = net.outputs;
+    int k = net->outputs;
    matrix pred = make_matrix(test.X.rows, k);
-    float *X = calloc(net.batch*test.X.rows, sizeof(float));
-    for(i = 0; i < test.X.rows; i += net.batch){
-        for(b = 0; b < net.batch; ++b){
+    float *X = calloc(net->batch*test.X.rows, sizeof(float));
+    for(i = 0; i < test.X.rows; i += net->batch){
+        for(b = 0; b < net->batch; ++b){
            if(i+b == test.X.rows) break;
            memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
        }
        for(m = 0; m < n; ++m){
            float *out = network_predict(net, X);
-            for(b = 0; b < net.batch; ++b){
+            for(b = 0; b < net->batch; ++b){
                if(i+b == test.X.rows) break;
                for(j = 0; j < k; ++j){
                    pred.vals[i+b][j] += out[j+b*k]/n;
@ -568,19 +563,19 @@ matrix network_predict_data_multi(network net, data test, int n)
    return pred;   
 }

-matrix network_predict_data(network net, data test)
+matrix network_predict_data(network *net, data test)
 {
    int i,j,b;
-    int k = net.outputs;
+    int k = net->outputs;
    matrix pred = make_matrix(test.X.rows, k);
-    float *X = calloc(net.batch*test.X.cols, sizeof(float));
-    for(i = 0; i < test.X.rows; i += net.batch){
-        for(b = 0; b < net.batch; ++b){
+    float *X = calloc(net->batch*test.X.cols, sizeof(float));
+    for(i = 0; i < test.X.rows; i += net->batch){
+        for(b = 0; b < net->batch; ++b){
            if(i+b == test.X.rows) break;
            memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
        }
        float *out = network_predict(net, X);
-        for(b = 0; b < net.batch; ++b){
+        for(b = 0; b < net->batch; ++b){
            if(i+b == test.X.rows) break;
            for(j = 0; j < k; ++j){
                pred.vals[i+b][j] = out[j+b*k];
@ -591,11 +586,11 @@ matrix network_predict_data(network net, data test)
    return pred;   
 }

-void print_network(network net)
+void print_network(network *net)
 {
    int i,j;
-    for(i = 0; i < net.n; ++i){
-        layer l = net.layers[i];
+    for(i = 0; i < net->n; ++i){
+        layer l = net->layers[i];
        float *output = l.output;
        int n = l.outputs;
        float mean = mean_array(output, n);
@ -608,7 +603,7 @@ void print_network(network net)
    }
 }

-void compare_networks(network n1, network n2, data test)
+void compare_networks(network *n1, network *n2, data test)
 {
    matrix g1 = network_predict_data(n1, test);
    matrix g2 = network_predict_data(n2, test);
@ -633,7 +628,7 @@ void compare_networks(network n1, network n2, data test)
    printf("%f\n", num/den); 
 }

-float network_accuracy(network net, data d)
+float network_accuracy(network *net, data d)
 {
    matrix guess = network_predict_data(net, d);
    float acc = matrix_topk_accuracy(d.y, guess,1);
@ -641,7 +636,7 @@ float network_accuracy(network net, data d)
    return acc;
 }

-float *network_accuracies(network net, data d, int n)
+float *network_accuracies(network *net, data d, int n)
 {
    static float acc[2];
    matrix guess = network_predict_data(net, d);
@ -651,16 +646,16 @@ float *network_accuracies(network net, data d, int n)
    return acc;
 }

-layer get_network_output_layer(network net)
+layer get_network_output_layer(network *net)
 {
    int i;
-    for(i = net.n - 1; i >= 0; --i){
-        if(net.layers[i].type != COST) break;
+    for(i = net->n - 1; i >= 0; --i){
+        if(net->layers[i].type != COST) break;
    }
-    return net.layers[i];
+    return net->layers[i];
 }

-float network_accuracy_multi(network net, data d, int n)
+float network_accuracy_multi(network *net, data d, int n)
 {
    matrix guess = network_predict_data_multi(net, d, n);
    float acc = matrix_topk_accuracy(d.y, guess,1);
@ -668,45 +663,417 @@ float network_accuracy_multi(network net, data d, int n)
    return acc;
 }

-void free_network(network net)
+void free_network(network *net)
 {
    int i;
-    for(i = 0; i < net.n; ++i){
-        free_layer(net.layers[i]);
+    for(i = 0; i < net->n; ++i){
+        free_layer(net->layers[i]);
    }
-    free(net.layers);
-    if(net.input) free(net.input);
-    if(net.truth) free(net.truth);
+    free(net->layers);
+    if(net->input) free(net->input);
+    if(net->truth) free(net->truth);
 #ifdef GPU
-    if(net.input_gpu) cuda_free(net.input_gpu);
-    if(net.truth_gpu) cuda_free(net.truth_gpu);
+    if(net->input_gpu) cuda_free(net->input_gpu);
+    if(net->truth_gpu) cuda_free(net->truth_gpu);
 #endif
+    free(net);
 }

 // Some day...
+// ^ What the hell is this comment for?


-layer network_output_layer(network net)
+layer network_output_layer(network *net)
 {
    int i;
-    for(i = net.n - 1; i >= 0; --i){
-        if(net.layers[i].type != COST) break;
+    for(i = net->n - 1; i >= 0; --i){
+        if(net->layers[i].type != COST) break;
    }
-    return net.layers[i];
+    return net->layers[i];
 }

-int network_inputs(network net)
+int network_inputs(network *net)
 {
-    return net.layers[0].inputs;
+    return net->layers[0].inputs;
 }

-int network_outputs(network net)
+int network_outputs(network *net)
 {
    return network_output_layer(net).outputs;
 }

-float *network_output(network net)
+float *network_output(network *net)
 {
    return network_output_layer(net).output;
 }

+#ifdef GPU
+
+void forward_network_gpu(network *netp)
+{
+    network net = *netp;
+    cuda_set_device(net.gpu_index);
+    cuda_push_array(net.input_gpu, net.input, net.inputs*net.batch);
+    if(net.truth){
+        cuda_push_array(net.truth_gpu, net.truth, net.truths*net.batch);
+    }
+
+    int i;
+    for(i = 0; i < net.n; ++i){
+        net.index = i;
+        layer l = net.layers[i];
+        if(l.delta_gpu){
+            fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
+        }
+        l.forward_gpu(l, net);
+        net.input_gpu = l.output_gpu;
+        net.input = l.output;
+        if(l.truth) {
+            net.truth_gpu = l.output_gpu;
+            net.truth = l.output;
+        }
+    }
+    pull_network_output(netp);
+    calc_network_cost(netp);
+}
+
+void backward_network_gpu(network *netp)
+{
+    int i;
+    network net = *netp;
+    network orig = net;
+    cuda_set_device(net.gpu_index);
+    for(i = net.n-1; i >= 0; --i){
+        layer l = net.layers[i];
+        if(l.stopbackward) break;
+        if(i == 0){
+            net = orig;
+        }else{
+            layer prev = net.layers[i-1];
+            net.input = prev.output;
+            net.delta = prev.delta;
+            net.input_gpu = prev.output_gpu;
+            net.delta_gpu = prev.delta_gpu;
+        }
+        net.index = i;
+        l.backward_gpu(l, net);
+    }
+}
+
+void update_network_gpu(network *netp)
+{
+    network net = *netp;
+    cuda_set_device(net.gpu_index);
+    int i;
+    update_args a = {0};
+    a.batch = net.batch*net.subdivisions;
+    a.learning_rate = get_current_rate(netp);
+    a.momentum = net.momentum;
+    a.decay = net.decay;
+    a.adam = net.adam;
+    a.B1 = net.B1;
+    a.B2 = net.B2;
+    a.eps = net.eps;
+    ++*net.t;
+    a.t = (*net.t);
+
+    for(i = 0; i < net.n; ++i){
+        layer l = net.layers[i];
+        if(l.update_gpu){
+            l.update_gpu(l, a);
+        }
+    }
+}
+
+void harmless_update_network_gpu(network *netp)
+{
+    network net = *netp;
+    cuda_set_device(net.gpu_index);
+    int i;
+    for(i = 0; i < net.n; ++i){
+        layer l = net.layers[i];
+        if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1);
+        if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1);
+        if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1);
+    }
+}
+
+typedef struct {
+    network *net;
+    data d;
+    float *err;
+} train_args;
+
+void *train_thread(void *ptr)
+{
+    train_args args = *(train_args*)ptr;
+    free(ptr);
+    cuda_set_device(args.net->gpu_index);
+    *args.err = train_network(args.net, args.d);
+    return 0;
+}
+
+pthread_t train_network_in_thread(network *net, data d, float *err)
+{
+    pthread_t thread;
+    train_args *ptr = (train_args *)calloc(1, sizeof(train_args));
+    ptr->net = net;
+    ptr->d = d;
+    ptr->err = err;
+    if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed");
+    return thread;
+}
+
+void merge_weights(layer l, layer base)
+{
+    if (l.type == CONVOLUTIONAL) {
+        axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1);
+        axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1);
+        if (l.scales) {
+            axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1);
+        }
+    } else if(l.type == CONNECTED) {
+        axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1);
+        axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1);
+    }
+}
+
+void scale_weights(layer l, float s)
+{
+    if (l.type == CONVOLUTIONAL) {
+        scal_cpu(l.n, s, l.biases, 1);
+        scal_cpu(l.nweights, s, l.weights, 1);
+        if (l.scales) {
+            scal_cpu(l.n, s, l.scales, 1);
+        }
+    } else if(l.type == CONNECTED) {
+        scal_cpu(l.outputs, s, l.biases, 1);
+        scal_cpu(l.outputs*l.inputs, s, l.weights, 1);
+    }
+}
+
+
+void pull_weights(layer l)
+{
+    if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
+        cuda_pull_array(l.biases_gpu, l.bias_updates, l.n);
+        cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights);
+        if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n);
+    } else if(l.type == CONNECTED){
+        cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs);
+        cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs);
+    }
+}
+
+void push_weights(layer l)
+{
+    if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
+        cuda_push_array(l.biases_gpu, l.biases, l.n);
+        cuda_push_array(l.weights_gpu, l.weights, l.nweights);
+        if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n);
+    } else if(l.type == CONNECTED){
+        cuda_push_array(l.biases_gpu, l.biases, l.outputs);
+        cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs);
+    }
+}
+
+void distribute_weights(layer l, layer base)
+{
+    if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) {
+        cuda_push_array(l.biases_gpu, base.biases, l.n);
+        cuda_push_array(l.weights_gpu, base.weights, l.nweights);
+        if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n);
+    } else if (l.type == CONNECTED) {
+        cuda_push_array(l.biases_gpu, base.biases, l.outputs);
+        cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs);
+    }
+}
+
+
+/*
+
+   void pull_updates(layer l)
+   {
+   if(l.type == CONVOLUTIONAL){
+   cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
+   cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
+   if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n);
+   } else if(l.type == CONNECTED){
+   cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
+   cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
+   }
+   }
+
+   void push_updates(layer l)
+   {
+   if(l.type == CONVOLUTIONAL){
+   cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
+   cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
+   if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n);
+   } else if(l.type == CONNECTED){
+   cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
+   cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
+   }
+   }
+
+   void update_layer(layer l, network net)
+   {
+   int update_batch = net.batch*net.subdivisions;
+   float rate = get_current_rate(net);
+   l.t = get_current_batch(net);
+   if(l.update_gpu){
+   l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
+   }
+   }
+   void merge_updates(layer l, layer base)
+   {
+   if (l.type == CONVOLUTIONAL) {
+   axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1);
+   axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1);
+   if (l.scale_updates) {
+   axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1);
+   }
+   } else if(l.type == CONNECTED) {
+   axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1);
+   axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1);
+   }
+   }
+
+   void distribute_updates(layer l, layer base)
+   {
+   if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
+   cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n);
+   cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights);
+   if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n);
+   } else if(l.type == CONNECTED){
+   cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs);
+   cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs);
+   }
+   }
+ */
+
+/*
+   void sync_layer(network *nets, int n, int j)
+   {
+   int i;
+   network net = nets[0];
+   layer base = net.layers[j];
+   scale_weights(base, 0);
+   for (i = 0; i < n; ++i) {
+   cuda_set_device(nets[i].gpu_index);
+   layer l = nets[i].layers[j];
+   pull_weights(l);
+   merge_weights(l, base);
+   }
+   scale_weights(base, 1./n);
+   for (i = 0; i < n; ++i) {
+   cuda_set_device(nets[i].gpu_index);
+   layer l = nets[i].layers[j];
+   distribute_weights(l, base);
+   }
+   }
+ */
+
+void sync_layer(network **nets, int n, int j)
+{
+    int i;
+    network *net = nets[0];
+    layer base = net->layers[j];
+    scale_weights(base, 0);
+    for (i = 0; i < n; ++i) {
+        cuda_set_device(nets[i]->gpu_index);
+        layer l = nets[i]->layers[j];
+        pull_weights(l);
+        merge_weights(l, base);
+    }
+    scale_weights(base, 1./n);
+    for (i = 0; i < n; ++i) {
+        cuda_set_device(nets[i]->gpu_index);
+        layer l = nets[i]->layers[j];
+        distribute_weights(l, base);
+    }
+}
+
+typedef struct{
+    network **nets;
+    int n;
+    int j;
+} sync_args;
+
+void *sync_layer_thread(void *ptr)
+{
+    sync_args args = *(sync_args*)ptr;
+    sync_layer(args.nets, args.n, args.j);
+    free(ptr);
+    return 0;
+}
+
+pthread_t sync_layer_in_thread(network **nets, int n, int j)
+{
+    pthread_t thread;
+    sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args));
+    ptr->nets = nets;
+    ptr->n = n;
+    ptr->j = j;
+    if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed");
+    return thread;
+}
+
+void sync_nets(network **nets, int n, int interval)
+{
+    int j;
+    int layers = nets[0]->n;
+    pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t));
+
+    *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions;
+    for (j = 0; j < n; ++j){
+        *(nets[j]->seen) = *(nets[0]->seen);
+    }
+    for (j = 0; j < layers; ++j) {
+        threads[j] = sync_layer_in_thread(nets, n, j);
+    }
+    for (j = 0; j < layers; ++j) {
+        pthread_join(threads[j], 0);
+    }
+    free(threads);
+}
+
+float train_networks(network **nets, int n, data d, int interval)
+{
+    int i;
+    int batch = nets[0]->batch;
+    int subdivisions = nets[0]->subdivisions;
+    assert(batch * subdivisions * n == d.X.rows);
+    pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
+    float *errors = (float *) calloc(n, sizeof(float));
+
+    float sum = 0;
+    for(i = 0; i < n; ++i){
+        data p = get_data_part(d, i, n);
+        threads[i] = train_network_in_thread(nets[i], p, errors + i);
+    }
+    for(i = 0; i < n; ++i){
+        pthread_join(threads[i], 0);
+        //printf("%f\n", errors[i]);
+        sum += errors[i];
+    }
+    //cudaDeviceSynchronize();
+    if (get_current_batch(nets[0]) % interval == 0) {
+        printf("Syncing... ");
+        fflush(stdout);
+        sync_nets(nets, n, interval);
+        printf("Done!\n");
+    }
+    //cudaDeviceSynchronize();
+    free(threads);
+    free(errors);
+    return (float)sum/(n);
+}
+
+void pull_network_output(network *net)
+{
+    layer l = get_network_output_layer(net);
+    cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch);
+}
+
+#endif
--- a/src/network.h
+++ b/src/network.h
@ -10,22 +10,20 @@


 #ifdef GPU
-float train_network_datum_gpu(network net);
-float *network_predict_gpu(network net, float *input);
-void pull_network_output(network net);
+void pull_network_output(network *net);
 #endif

-void compare_networks(network n1, network n2, data d);
+void compare_networks(network *n1, network *n2, data d);
 char *get_layer_string(LAYER_TYPE a);

-network make_network(int n);
+network *make_network(int n);


-float network_accuracy_multi(network net, data d, int n);
-int get_predicted_class_network(network net);
-void print_network(network net);
+float network_accuracy_multi(network *net, data d, int n);
+int get_predicted_class_network(network *net);
+void print_network(network *net);
 int resize_network(network *net, int w, int h);
-void calc_network_cost(network net);
+void calc_network_cost(network *net);

 #endif

--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@ -1,422 +0,0 @@
-#include "cuda_runtime.h"
-#include "curand.h"
-#include "cublas_v2.h"
-
-extern "C" {
-#include <stdio.h>
-#include <time.h>
-#include <assert.h>
-
-#include "network.h"
-#include "data.h"
-#include "utils.h"
-#include "parser.h"
-
-#include "crop_layer.h"
-#include "connected_layer.h"
-#include "rnn_layer.h"
-#include "gru_layer.h"
-#include "crnn_layer.h"
-#include "detection_layer.h"
-#include "region_layer.h"
-#include "convolutional_layer.h"
-#include "activation_layer.h"
-#include "maxpool_layer.h"
-#include "reorg_layer.h"
-#include "avgpool_layer.h"
-#include "normalization_layer.h"
-#include "batchnorm_layer.h"
-#include "cost_layer.h"
-#include "local_layer.h"
-#include "softmax_layer.h"
-#include "dropout_layer.h"
-#include "route_layer.h"
-#include "shortcut_layer.h"
-#include "blas.h"
-}
-
-void forward_network_gpu(network net)
-{
-    int i;
-    for(i = 0; i < net.n; ++i){
-        net.index = i;
-        layer l = net.layers[i];
-        if(l.delta_gpu){
-            fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
-        }
-        l.forward_gpu(l, net);
-        net.input_gpu = l.output_gpu;
-        net.input = l.output;
-        if(l.truth) {
-            net.truth_gpu = l.output_gpu;
-            net.truth = l.output;
-        }
-    }
-    pull_network_output(net);
-    calc_network_cost(net);
-}
-
-void backward_network_gpu(network net)
-{
-    int i;
-    network orig = net;
-    for(i = net.n-1; i >= 0; --i){
-        layer l = net.layers[i];
-        if(l.stopbackward) break;
-        if(i == 0){
-            net = orig;
-        }else{
-            layer prev = net.layers[i-1];
-            net.input = prev.output;
-            net.delta = prev.delta;
-            net.input_gpu = prev.output_gpu;
-            net.delta_gpu = prev.delta_gpu;
-        }
-        net.index = i;
-        l.backward_gpu(l, net);
-    }
-}
-
-void update_network_gpu(network net)
-{
-    cuda_set_device(net.gpu_index);
-    int i;
-    update_args a = {0};
-    a.batch = net.batch*net.subdivisions;
-    a.learning_rate = get_current_rate(net);
-    a.momentum = net.momentum;
-    a.decay = net.decay;
-    a.adam = net.adam;
-    a.B1 = net.B1;
-    a.B2 = net.B2;
-    a.eps = net.eps;
-    ++*net.t;
-    a.t = (*net.t);
-
-    for(i = 0; i < net.n; ++i){
-        layer l = net.layers[i];
-        if(l.update_gpu){
-            l.update_gpu(l, a);
-        }
-    }
-}
-
-void harmless_update_network_gpu(network net)
-{
-    cuda_set_device(net.gpu_index);
-    int i;
-    for(i = 0; i < net.n; ++i){
-        layer l = net.layers[i];
-        if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1);
-        if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1);
-        if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1);
-    }
-}
-
-float train_network_datum_gpu(network net)
-{
-    *net.seen += net.batch;
-
-    int x_size = net.inputs*net.batch;
-    int y_size = net.truths*net.batch;
-    cuda_push_array(net.input_gpu, net.input, x_size);
-    cuda_push_array(net.truth_gpu, net.truth, y_size);
-
-    net.train = 1;
-    forward_network_gpu(net);
-    backward_network_gpu(net);
-
-    float error = *net.cost;
-    if (((*net.seen) / net.batch) % net.subdivisions == 0) update_network_gpu(net);
-
-    return error;
-}
-
-typedef struct {
-    network net;
-    data d;
-    float *err;
-} train_args;
-
-void *train_thread(void *ptr)
-{
-    train_args args = *(train_args*)ptr;
-    free(ptr);
-    cuda_set_device(args.net.gpu_index);
-    *args.err = train_network(args.net, args.d);
-    return 0;
-}
-
-pthread_t train_network_in_thread(network net, data d, float *err)
-{
-    pthread_t thread;
-    train_args *ptr = (train_args *)calloc(1, sizeof(train_args));
-    ptr->net = net;
-    ptr->d = d;
-    ptr->err = err;
-    if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed");
-    return thread;
-}
-
-void merge_weights(layer l, layer base)
-{
-    if (l.type == CONVOLUTIONAL) {
-        axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1);
-        axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1);
-        if (l.scales) {
-            axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1);
-        }
-    } else if(l.type == CONNECTED) {
-        axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1);
-        axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1);
-    }
-}
-
-void scale_weights(layer l, float s)
-{
-    if (l.type == CONVOLUTIONAL) {
-        scal_cpu(l.n, s, l.biases, 1);
-        scal_cpu(l.nweights, s, l.weights, 1);
-        if (l.scales) {
-            scal_cpu(l.n, s, l.scales, 1);
-        }
-    } else if(l.type == CONNECTED) {
-        scal_cpu(l.outputs, s, l.biases, 1);
-        scal_cpu(l.outputs*l.inputs, s, l.weights, 1);
-    }
-}
-
-
-void pull_weights(layer l)
-{
-    if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
-        cuda_pull_array(l.biases_gpu, l.bias_updates, l.n);
-        cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights);
-        if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n);
-    } else if(l.type == CONNECTED){
-        cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs);
-        cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs);
-    }
-}
-
-void push_weights(layer l)
-{
-    if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
-        cuda_push_array(l.biases_gpu, l.biases, l.n);
-        cuda_push_array(l.weights_gpu, l.weights, l.nweights);
-        if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n);
-    } else if(l.type == CONNECTED){
-        cuda_push_array(l.biases_gpu, l.biases, l.outputs);
-        cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs);
-    }
-}
-
-void distribute_weights(layer l, layer base)
-{
-    if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) {
-        cuda_push_array(l.biases_gpu, base.biases, l.n);
-        cuda_push_array(l.weights_gpu, base.weights, l.nweights);
-        if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n);
-    } else if (l.type == CONNECTED) {
-        cuda_push_array(l.biases_gpu, base.biases, l.outputs);
-        cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs);
-    }
-}
-
-
-/*
-
-void pull_updates(layer l)
-{
-    if(l.type == CONVOLUTIONAL){
-        cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
-        cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
-        if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n);
-    } else if(l.type == CONNECTED){
-        cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
-        cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
-    }
-}
-
-void push_updates(layer l)
-{
-    if(l.type == CONVOLUTIONAL){
-        cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
-        cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
-        if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n);
-    } else if(l.type == CONNECTED){
-        cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
-        cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
-    }
-}
-
-void update_layer(layer l, network net)
-{
-    int update_batch = net.batch*net.subdivisions;
-    float rate = get_current_rate(net);
-    l.t = get_current_batch(net);
-    if(l.update_gpu){
-        l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
-    }
-}
-void merge_updates(layer l, layer base)
-{
-    if (l.type == CONVOLUTIONAL) {
-        axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1);
-        axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1);
-        if (l.scale_updates) {
-            axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1);
-        }
-    } else if(l.type == CONNECTED) {
-        axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1);
-        axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1);
-    }
-}
-
-void distribute_updates(layer l, layer base)
-{
-    if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
-        cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n);
-        cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights);
-        if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n);
-    } else if(l.type == CONNECTED){
-        cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs);
-        cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs);
-    }
-}
-*/
-
-/*
-void sync_layer(network *nets, int n, int j)
-{
-    int i;
-    network net = nets[0];
-    layer base = net.layers[j];
-    scale_weights(base, 0);
-    for (i = 0; i < n; ++i) {
-        cuda_set_device(nets[i].gpu_index);
-        layer l = nets[i].layers[j];
-        pull_weights(l);
-        merge_weights(l, base);
-    }
-    scale_weights(base, 1./n);
-    for (i = 0; i < n; ++i) {
-        cuda_set_device(nets[i].gpu_index);
-        layer l = nets[i].layers[j];
-        distribute_weights(l, base);
-    }
-}
-*/
-
-void sync_layer(network *nets, int n, int j)
-{
-    int i;
-    network net = nets[0];
-    layer base = net.layers[j];
-    scale_weights(base, 0);
-    for (i = 0; i < n; ++i) {
-        cuda_set_device(nets[i].gpu_index);
-        layer l = nets[i].layers[j];
-        pull_weights(l);
-        merge_weights(l, base);
-    }
-    scale_weights(base, 1./n);
-    for (i = 0; i < n; ++i) {
-        cuda_set_device(nets[i].gpu_index);
-        layer l = nets[i].layers[j];
-        distribute_weights(l, base);
-    }
-}
-
-typedef struct{
-    network *nets;
-    int n;
-    int j;
-} sync_args;
-
-void *sync_layer_thread(void *ptr)
-{
-    sync_args args = *(sync_args*)ptr;
-    sync_layer(args.nets, args.n, args.j);
-    free(ptr);
-    return 0;
-}
-
-pthread_t sync_layer_in_thread(network *nets, int n, int j)
-{
-    pthread_t thread;
-    sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args));
-    ptr->nets = nets;
-    ptr->n = n;
-    ptr->j = j;
-    if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed");
-    return thread;
-}
-
-void sync_nets(network *nets, int n, int interval)
-{
-    int j;
-    int layers = nets[0].n;
-    pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t));
-
-    *nets[0].seen += interval * (n-1) * nets[0].batch * nets[0].subdivisions;
-    for (j = 0; j < n; ++j){
-        *nets[j].seen = *nets[0].seen;
-    }
-    for (j = 0; j < layers; ++j) {
-        threads[j] = sync_layer_in_thread(nets, n, j);
-    }
-    for (j = 0; j < layers; ++j) {
-        pthread_join(threads[j], 0);
-    }
-    free(threads);
-}
-
-float train_networks(network *nets, int n, data d, int interval)
-{
-    int i;
-    int batch = nets[0].batch;
-    int subdivisions = nets[0].subdivisions;
-    assert(batch * subdivisions * n == d.X.rows);
-    pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
-    float *errors = (float *) calloc(n, sizeof(float));
-
-    float sum = 0;
-    for(i = 0; i < n; ++i){
-        data p = get_data_part(d, i, n);
-        threads[i] = train_network_in_thread(nets[i], p, errors + i);
-    }
-    for(i = 0; i < n; ++i){
-        pthread_join(threads[i], 0);
-        //printf("%f\n", errors[i]);
-        sum += errors[i];
-    }
-    //cudaDeviceSynchronize();
-    if (get_current_batch(nets[0]) % interval == 0) {
-        printf("Syncing... ");
-        fflush(stdout);
-        sync_nets(nets, n, interval);
-        printf("Done!\n");
-    }
-    //cudaDeviceSynchronize();
-    free(threads);
-    free(errors);
-    return (float)sum/(n);
-}
-
-void pull_network_output(network net)
-{
-    layer l = get_network_output_layer(net);
-    cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch);
-}
-
-float *network_predict_gpu(network net, float *input)
-{
-    cuda_set_device(net.gpu_index);
-    cuda_push_array(net.input_gpu, input, net.inputs*net.batch);
-    net.truth = 0;
-    net.train = 0;
-    forward_network_gpu(net);
-    return net.output;
-}
-
--- a/src/parser.c
+++ b/src/parser.c
@ -116,7 +116,7 @@ typedef struct size_params{
    int c;
    int index;
    int time_steps;
-    network net;
+    network *net;
 } size_params;

 local_layer parse_local(list *options, size_params params)
@ -160,7 +160,7 @@ layer parse_deconvolutional(list *options, size_params params)
    int padding = option_find_int_quiet(options, "padding",0);
    if(pad) padding = size/2;

-    layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net.adam);
+    layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam);

    return l;
 }
@ -189,7 +189,7 @@ convolutional_layer parse_convolutional(list *options, size_params params)
    int binary = option_find_int_quiet(options, "binary", 0);
    int xnor = option_find_int_quiet(options, "xnor", 0);

-    convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net.adam);
+    convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam);
    layer.flipped = option_find_int_quiet(options, "flipped", 0);
    layer.dot = option_find_float_quiet(options, "dot", 0);

@ -218,7 +218,7 @@ layer parse_rnn(list *options, size_params params)
    ACTIVATION activation = get_activation(activation_s);
    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);

-    layer l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net.adam);
+    layer l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net->adam);

    l.shortcut = option_find_int_quiet(options, "shortcut", 0);

@ -230,7 +230,7 @@ layer parse_gru(list *options, size_params params)
    int output = option_find_int(options, "output",1);
    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);

-    layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net.adam);
+    layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam);
    l.tanh = option_find_int_quiet(options, "tanh", 0);

    return l;
@ -241,7 +241,7 @@ layer parse_lstm(list *options, size_params params)
    int output = option_find_int(options, "output", 1);
    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);

-    layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net.adam);
+    layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam);

    return l;
 }
@ -253,7 +253,7 @@ layer parse_connected(list *options, size_params params)
    ACTIVATION activation = get_activation(activation_s);
    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);

-    layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net.adam);
+    layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam);
    return l;
 }

@ -456,14 +456,14 @@ layer parse_batchnorm(list *options, size_params params)
    return l;
 }

-layer parse_shortcut(list *options, size_params params, network net)
+layer parse_shortcut(list *options, size_params params, network *net)
 {
    char *l = option_find(options, "from");
    int index = atoi(l);
    if(index < 0) index = params.index + index;

    int batch = params.batch;
-    layer from = net.layers[index];
+    layer from = net->layers[index];

    layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);

@ -491,7 +491,7 @@ layer parse_activation(list *options, size_params params)
    return l;
 }

-route_layer parse_route(list *options, size_params params, network net)
+route_layer parse_route(list *options, size_params params, network *net)
 {
    char *l = option_find(options, "layers");
    int len = strlen(l);
@ -509,19 +509,19 @@ route_layer parse_route(list *options, size_params params, network net)
        l = strchr(l, ',')+1;
        if(index < 0) index = params.index + index;
        layers[i] = index;
-        sizes[i] = net.layers[index].outputs;
+        sizes[i] = net->layers[index].outputs;
    }
    int batch = params.batch;

    route_layer layer = make_route_layer(batch, n, layers, sizes);

-    convolutional_layer first = net.layers[layers[0]];
+    convolutional_layer first = net->layers[layers[0]];
    layer.out_w = first.out_w;
    layer.out_h = first.out_h;
    layer.out_c = first.out_c;
    for(i = 1; i < n; ++i){
        int index = layers[i];
-        convolutional_layer next = net.layers[index];
+        convolutional_layer next = net->layers[index];
        if(next.out_w == first.out_w && next.out_h == first.out_h){
            layer.out_c += next.out_c;
        }else{
@ -557,6 +557,7 @@ void parse_net_options(list *options, network *net)
    net->batch /= subdivs;
    net->batch *= net->time_steps;
    net->subdivisions = subdivs;
+    net->random = option_find_int_quiet(options, "random", 0);

    net->adam = option_find_int_quiet(options, "adam", 0);
    if(net->adam){
@ -571,6 +572,8 @@ void parse_net_options(list *options, network *net)
    net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
    net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
    net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
+    net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w);
+    net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w);
    net->center = option_find_int_quiet(options, "center",0);

    net->angle = option_find_float_quiet(options, "angle", 0);
@ -628,26 +631,26 @@ int is_network(section *s)
            || strcmp(s->type, "[network]")==0);
 }

-network parse_network_cfg(char *filename)
+network *parse_network_cfg(char *filename)
 {
    list *sections = read_cfg(filename);
    node *n = sections->front;
    if(!n) error("Config file has no sections");
-    network net = make_network(sections->size - 1);
-    net.gpu_index = gpu_index;
+    network *net = make_network(sections->size - 1);
+    net->gpu_index = gpu_index;
    size_params params;

    section *s = (section *)n->val;
    list *options = s->options;
    if(!is_network(s)) error("First section must be [net] or [network]");
-    parse_net_options(options, &net);
+    parse_net_options(options, net);

-    params.h = net.h;
-    params.w = net.w;
-    params.c = net.c;
-    params.inputs = net.inputs;
-    params.batch = net.batch;
-    params.time_steps = net.time_steps;
+    params.h = net->h;
+    params.w = net->w;
+    params.c = net->c;
+    params.inputs = net->inputs;
+    params.batch = net->batch;
+    params.time_steps = net->time_steps;
    params.net = net;

    size_t workspace_size = 0;
@ -690,7 +693,7 @@ network parse_network_cfg(char *filename)
            l = parse_detection(options, params);
        }else if(lt == SOFTMAX){
            l = parse_softmax(options, params);
-            net.hierarchy = l.softmax_tree;
+            net->hierarchy = l.softmax_tree;
        }else if(lt == NORMALIZATION){
            l = parse_normalization(options, params);
        }else if(lt == BATCHNORM){
@ -707,11 +710,11 @@ network parse_network_cfg(char *filename)
            l = parse_shortcut(options, params, net);
        }else if(lt == DROPOUT){
            l = parse_dropout(options, params);
-            l.output = net.layers[count-1].output;
-            l.delta = net.layers[count-1].delta;
+            l.output = net->layers[count-1].output;
+            l.delta = net->layers[count-1].delta;
 #ifdef GPU
-            l.output_gpu = net.layers[count-1].output_gpu;
-            l.delta_gpu = net.layers[count-1].delta_gpu;
+            l.output_gpu = net->layers[count-1].output_gpu;
+            l.delta_gpu = net->layers[count-1].delta_gpu;
 #endif
        }else{
            fprintf(stderr, "Type not recognized: %s\n", s->type);
@ -724,7 +727,7 @@ network parse_network_cfg(char *filename)
        l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
        l.smooth = option_find_float_quiet(options, "smooth", 0);
        option_unused(options);
-        net.layers[count] = l;
+        net->layers[count] = l;
        if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
        free_section(s);
        n = n->next;
@ -738,27 +741,27 @@ network parse_network_cfg(char *filename)
    }
    free_list(sections);
    layer out = get_network_output_layer(net);
-    net.outputs = out.outputs;
-    net.truths = out.outputs;
-    if(net.layers[net.n-1].truths) net.truths = net.layers[net.n-1].truths;
-    net.output = out.output;
-    net.input = calloc(net.inputs*net.batch, sizeof(float));
-    net.truth = calloc(net.truths*net.batch, sizeof(float));
+    net->outputs = out.outputs;
+    net->truths = out.outputs;
+    if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths;
+    net->output = out.output;
+    net->input = calloc(net->inputs*net->batch, sizeof(float));
+    net->truth = calloc(net->truths*net->batch, sizeof(float));
 #ifdef GPU
-    net.output_gpu = out.output_gpu;
-    net.input_gpu = cuda_make_array(net.input, net.inputs*net.batch);
-    net.truth_gpu = cuda_make_array(net.truth, net.truths*net.batch);
+    net->output_gpu = out.output_gpu;
+    net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch);
+    net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch);
 #endif
    if(workspace_size){
        //printf("%ld\n", workspace_size);
 #ifdef GPU
        if(gpu_index >= 0){
-            net.workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
+            net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
        }else {
-            net.workspace = calloc(1, workspace_size);
+            net->workspace = calloc(1, workspace_size);
        }
 #else
-        net.workspace = calloc(1, workspace_size);
+        net->workspace = calloc(1, workspace_size);
 #endif
    }
    return net;
@ -880,11 +883,11 @@ void save_connected_weights(layer l, FILE *fp)
    }
 }

-void save_weights_upto(network net, char *filename, int cutoff)
+void save_weights_upto(network *net, char *filename, int cutoff)
 {
 #ifdef GPU
-    if(net.gpu_index >= 0){
-        cuda_set_device(net.gpu_index);
+    if(net->gpu_index >= 0){
+        cuda_set_device(net->gpu_index);
    }
 #endif
    fprintf(stderr, "Saving weights to %s\n", filename);
@ -897,11 +900,11 @@ void save_weights_upto(network net, char *filename, int cutoff)
    fwrite(&major, sizeof(int), 1, fp);
    fwrite(&minor, sizeof(int), 1, fp);
    fwrite(&revision, sizeof(int), 1, fp);
-    fwrite(net.seen, sizeof(size_t), 1, fp);
+    fwrite(net->seen, sizeof(size_t), 1, fp);

    int i;
-    for(i = 0; i < net.n && i < cutoff; ++i){
-        layer l = net.layers[i];
+    for(i = 0; i < net->n && i < cutoff; ++i){
+        layer l = net->layers[i];
        if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
            save_convolutional_weights(l, fp);
        } if(l.type == CONNECTED){
@ -952,9 +955,9 @@ void save_weights_upto(network net, char *filename, int cutoff)
    }
    fclose(fp);
 }
-void save_weights(network net, char *filename)
+void save_weights(network *net, char *filename)
 {
-    save_weights_upto(net, filename, net.n);
+    save_weights_upto(net, filename, net->n);
 }

 void transpose_matrix(float *a, int rows, int cols)
--- a/src/region_layer.c
+++ b/src/region_layer.c
@ -109,7 +109,7 @@ void delta_region_mask(float *truth, float *x, int n, int index, float *delta, i
 }


-void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat)
+void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag)
 {
    int i, n;
    if(hier){
@ -127,7 +127,7 @@ void delta_region_class(float *output, float *delta, int index, int class, int c
        }
        *avg_cat += pred;
    } else {
-        if (delta[index]){
+        if (delta[index] && tag){
            delta[index + stride*class] = scale * (1 - output[index + stride*class]);
            return;
        }
@ -218,7 +218,7 @@ void forward_region_layer(const layer l, network net)
                    }
                    int class_index = entry_index(l, b, maxi, l.coords + 1);
                    int obj_index = entry_index(l, b, maxi, l.coords);
-                    delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
+                    delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax);
                    if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]);
                    else  l.delta[obj_index] = 0;
                    l.delta[obj_index] = 0;
@ -316,7 +316,7 @@ void forward_region_layer(const layer l, network net)
            int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords];
            if (l.map) class = l.map[class];
            int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1);
-            delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
+            delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax);
            ++count;
            ++class_count;
        }