From 90d354a2a5a3ba76071337d8794cfc00f7bc5fab Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Sat, 13 Dec 2014 12:01:21 -0800 Subject: [PATCH] fixed dropout >< --- src/cnn.c | 44 +++++++++++++++++++++++++------------------- src/data.c | 30 +++++++++++++++--------------- src/data.h | 7 ++++--- src/dropout_layer.c | 42 ++++++++++++++++++++++++++++++++++++------ src/dropout_layer.cl | 4 ++-- src/dropout_layer.h | 9 ++++++--- src/network.c | 4 ++++ src/network_gpu.c | 4 ++++ 8 files changed, 96 insertions(+), 48 deletions(-) diff --git a/src/cnn.c b/src/cnn.c index 7448ece6..43676c1b 100644 --- a/src/cnn.c +++ b/src/cnn.c @@ -294,7 +294,7 @@ void train_asirra() while(1){ i += 1; time=clock(); - data train = load_data_random(imgs*net.batch, paths, m, labels, 2, 256, 256); + data train = load_data(paths, imgs*net.batch, m, labels, 2, 256, 256); normalize_data_rows(train); printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); @@ -404,7 +404,7 @@ void train_imagenet_distributed(char *address) printf("%d\n", plist->size); clock_t time; data train, buffer; - pthread_t load_thread = load_data_random_thread(imgs*net.batch, paths, plist->size, labels, 1000, 224, 224, &buffer); + pthread_t load_thread = load_data_thread(paths, imgs*net.batch, plist->size, labels, 1000, 224, 224, &buffer); while(1){ i += 1; @@ -416,7 +416,7 @@ void train_imagenet_distributed(char *address) pthread_join(load_thread, 0); train = buffer; normalize_data_rows(train); - load_thread = load_data_random_thread(imgs*net.batch, paths, plist->size, labels, 1000, 224, 224, &buffer); + load_thread = load_data_thread(paths, imgs*net.batch, plist->size, labels, 1000, 224, 224, &buffer); printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); @@ -434,11 +434,10 @@ void train_imagenet() float avg_loss = 1; //network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg"); srand(time(0)); - network net = parse_network_cfg("cfg/net.cfg"); + network net = parse_network_cfg("cfg/net.part"); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 1000/net.batch+1; - //imgs=1; - int i = 0; + int i = 9540; char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list"); list *plist = get_paths("/data/imagenet/cls.train.list"); char **paths = (char **)list_to_array(plist); @@ -447,14 +446,14 @@ void train_imagenet() pthread_t load_thread; data train; data buffer; - load_thread = load_data_random_thread(imgs*net.batch, paths, plist->size, labels, 1000, 224, 224, &buffer); + load_thread = load_data_thread(paths, imgs*net.batch, plist->size, labels, 1000, 224, 224, &buffer); while(1){ i += 1; time=clock(); pthread_join(load_thread, 0); train = buffer; normalize_data_rows(train); - load_thread = load_data_random_thread(imgs*net.batch, paths, plist->size, labels, 1000, 224, 224, &buffer); + load_thread = load_data_thread(paths, imgs*net.batch, plist->size, labels, 1000, 224, 224, &buffer); printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); #ifdef GPU @@ -465,7 +464,7 @@ void train_imagenet() free_data(train); if(i%10==0){ char buff[256]; - sprintf(buff, "/home/pjreddie/imagenet_backup/alexnet_%d.cfg", i); + sprintf(buff, "/home/pjreddie/imagenet_backup/net_%d.cfg", i); save_network(net, buff); } } @@ -473,7 +472,7 @@ void train_imagenet() void validate_imagenet(char *filename) { - int i; + int i = 0; network net = parse_network_cfg(filename); srand(time(0)); @@ -488,21 +487,28 @@ void validate_imagenet(char *filename) float avg_acc = 0; float avg_top5 = 0; int splits = 50; + int num = (i+1)*m/splits - i*m/splits; - for(i = 0; i < splits; ++i){ + data val, buffer; + pthread_t load_thread = load_data_thread(paths, num, 0, labels, 1000, 224, 224, &buffer); + for(i = 1; i <= splits; ++i){ time=clock(); - char **part = paths+(i*m/splits); - int num = (i+1)*m/splits - i*m/splits; - data val = load_data(part, num, labels, 1000, 224, 224); + pthread_join(load_thread, 0); + val = buffer; normalize_data_rows(val); + + num = (i+1)*m/splits - i*m/splits; + char **part = paths+(i*m/splits); + if(i != splits) load_thread = load_data_thread(part, num, 0, labels, 1000, 224, 224, &buffer); printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + time=clock(); #ifdef GPU float *acc = network_accuracies_gpu(net, val); avg_acc += acc[0]; avg_top5 += acc[1]; - printf("%d: top1: %f, top5: %f, %lf seconds, %d images\n", i, avg_acc/(i+1), avg_top5/(i+1), sec(clock()-time), val.X.rows); + printf("%d: top1: %f, top5: %f, %lf seconds, %d images\n", i, avg_acc/i, avg_top5/i, sec(clock()-time), val.X.rows); #endif free_data(val); } @@ -895,14 +901,14 @@ void test_correct_alexnet() int count = 0; srand(222222); - network net = parse_network_cfg("cfg/alexnet.test"); + network net = parse_network_cfg("cfg/net.cfg"); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 1000/net.batch+1; imgs = 1; while(++count <= 5){ time=clock(); - data train = load_data_random(imgs*net.batch, paths, plist->size, labels, 1000, 256, 256); + data train = load_data(paths, imgs*net.batch, plist->size, labels, 1000, 224,224); //translate_data_rows(train, -144); normalize_data_rows(train); printf("Loaded: %lf seconds\n", sec(clock()-time)); @@ -914,10 +920,10 @@ void test_correct_alexnet() #ifdef GPU count = 0; srand(222222); - net = parse_network_cfg("cfg/alexnet.test"); + net = parse_network_cfg("cfg/net.cfg"); while(++count <= 5){ time=clock(); - data train = load_data_random(imgs*net.batch, paths, plist->size, labels, 1000, 256, 256); + data train = load_data(paths, imgs*net.batch, plist->size, labels, 1000, 224, 224); //translate_data_rows(train, -144); normalize_data_rows(train); printf("Loaded: %lf seconds\n", sec(clock()-time)); diff --git a/src/data.c b/src/data.c index 764f43c2..86e59efc 100644 --- a/src/data.c +++ b/src/data.c @@ -180,16 +180,7 @@ data load_data_detection_random(int n, char **paths, int m, int h, int w, int nh return d; } -data load_data(char **paths, int n, char **labels, int k, int h, int w) -{ - data d; - d.shallow = 0; - d.X = load_image_paths(paths, n, h, w); - d.y = load_labels_paths(paths, n, labels, k); - return d; -} - -data load_data_random(int n, char **paths, int m, char **labels, int k, int h, int w) +char **get_random_paths(char **paths, int n, int m) { char **random_paths = calloc(n, sizeof(char*)); int i; @@ -198,14 +189,23 @@ data load_data_random(int n, char **paths, int m, char **labels, int k, int h, i random_paths[i] = paths[index]; if(i == 0) printf("%s\n", paths[index]); } - data d = load_data(random_paths, n, labels, k, h, w); - free(random_paths); + return random_paths; +} + +data load_data(char **paths, int n, int m, char **labels, int k, int h, int w) +{ + if(m) paths = get_random_paths(paths, n, m); + data d; + d.shallow = 0; + d.X = load_image_paths(paths, n, h, w); + d.y = load_labels_paths(paths, n, labels, k); + if(m) free(paths); return d; } struct load_args{ - int n; char **paths; + int n; int m; char **labels; int k; @@ -217,11 +217,11 @@ struct load_args{ void *load_in_thread(void *ptr) { struct load_args a = *(struct load_args*)ptr; - *a.d = load_data_random(a.n, a.paths, a.m, a.labels, a.k, a.h, a.w); + *a.d = load_data(a.paths, a.n, a.m, a.labels, a.k, a.h, a.w); return 0; } -pthread_t load_data_random_thread(int n, char **paths, int m, char **labels, int k, int h, int w, data *d) +pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d) { pthread_t thread; struct load_args *args = calloc(1, sizeof(struct load_args)); diff --git a/src/data.h b/src/data.h index 38a5e153..1c0b732a 100644 --- a/src/data.h +++ b/src/data.h @@ -13,9 +13,10 @@ typedef struct{ void free_data(data d); -data load_data(char **paths, int n, char **labels, int k, int h, int w); -pthread_t load_data_random_thread(int n, char **paths, int m, char **labels, int k, int h, int w, data *d); -data load_data_random(int n, char **paths, int m, char **labels, int k, int h, int w); + +data load_data(char **paths, int n, int m, char **labels, int k, int h, int w); +pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d); + data load_data_detection_random(int n, char **paths, int m, int h, int w, int nh, int nw, float scale); data load_data_detection_jitter_random(int n, char **paths, int m, int h, int w, int nh, int nw, float scale); data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w); diff --git a/src/dropout_layer.c b/src/dropout_layer.c index ad13034a..d4616d52 100644 --- a/src/dropout_layer.c +++ b/src/dropout_layer.c @@ -10,8 +10,9 @@ dropout_layer *make_dropout_layer(int batch, int inputs, float probability) layer->probability = probability; layer->inputs = inputs; layer->batch = batch; - #ifdef GPU layer->rand = calloc(inputs*batch, sizeof(float)); + layer->scale = 1./(1.-probability); + #ifdef GPU layer->rand_cl = cl_make_array(layer->rand, inputs*batch); #endif return layer; @@ -21,13 +22,21 @@ void forward_dropout_layer(dropout_layer layer, float *input) { int i; for(i = 0; i < layer.batch * layer.inputs; ++i){ - if(rand_uniform() < layer.probability) input[i] = 0; - else input[i] /= (1-layer.probability); + float r = rand_uniform(); + layer.rand[i] = r; + if(r < layer.probability) input[i] = 0; + else input[i] *= layer.scale; } } -void backward_dropout_layer(dropout_layer layer, float *input, float *delta) + +void backward_dropout_layer(dropout_layer layer, float *delta) { - // Don't do shit LULZ + int i; + for(i = 0; i < layer.batch * layer.inputs; ++i){ + float r = layer.rand[i]; + if(r < layer.probability) delta[i] = 0; + else delta[i] *= layer.scale; + } } #ifdef GPU @@ -36,7 +45,7 @@ cl_kernel get_dropout_kernel() static int init = 0; static cl_kernel kernel; if(!init){ - kernel = get_kernel("src/dropout_layer.cl", "forward", 0); + kernel = get_kernel("src/dropout_layer.cl", "yoloswag420blazeit360noscope", 0); init = 1; } return kernel; @@ -56,6 +65,27 @@ void forward_dropout_layer_gpu(dropout_layer layer, cl_mem input) cl.error = clSetKernelArg(kernel, i++, sizeof(input), (void*) &input); cl.error = clSetKernelArg(kernel, i++, sizeof(layer.rand_cl), (void*) &layer.rand_cl); cl.error = clSetKernelArg(kernel, i++, sizeof(layer.probability), (void*) &layer.probability); + cl.error = clSetKernelArg(kernel, i++, sizeof(layer.scale), (void*) &layer.scale); + check_error(cl); + + const size_t global_size[] = {size}; + + cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); + check_error(cl); +} + +void backward_dropout_layer_gpu(dropout_layer layer, cl_mem delta) +{ + int size = layer.inputs*layer.batch; + + cl_kernel kernel = get_dropout_kernel(); + cl_command_queue queue = cl.queue; + + cl_uint i = 0; + cl.error = clSetKernelArg(kernel, i++, sizeof(delta), (void*) &delta); + cl.error = clSetKernelArg(kernel, i++, sizeof(layer.rand_cl), (void*) &layer.rand_cl); + cl.error = clSetKernelArg(kernel, i++, sizeof(layer.probability), (void*) &layer.probability); + cl.error = clSetKernelArg(kernel, i++, sizeof(layer.scale), (void*) &layer.scale); check_error(cl); const size_t global_size[] = {size}; diff --git a/src/dropout_layer.cl b/src/dropout_layer.cl index aa24964a..729dbc44 100644 --- a/src/dropout_layer.cl +++ b/src/dropout_layer.cl @@ -1,5 +1,5 @@ -__kernel void forward(__global float *input, __global float *rand, float prob) +__kernel void yoloswag420blazeit360noscope(__global float *input, __global float *rand, float prob, float scale) { int id = get_global_id(0); - input[id] = (rand[id] < prob) ? 0 : input[id]/(1.-prob); + input[id] = (rand[id] < prob) ? 0 : input[id]*scale; } diff --git a/src/dropout_layer.h b/src/dropout_layer.h index 46459aa7..0a6f034f 100644 --- a/src/dropout_layer.h +++ b/src/dropout_layer.h @@ -6,8 +6,9 @@ typedef struct{ int batch; int inputs; float probability; - #ifdef GPU + float scale; float *rand; + #ifdef GPU cl_mem rand_cl; #endif } dropout_layer; @@ -15,9 +16,11 @@ typedef struct{ dropout_layer *make_dropout_layer(int batch, int inputs, float probability); void forward_dropout_layer(dropout_layer layer, float *input); -void backward_dropout_layer(dropout_layer layer, float *input, float *delta); - #ifdef GPU +void backward_dropout_layer(dropout_layer layer, float *delta); + +#ifdef GPU void forward_dropout_layer_gpu(dropout_layer layer, cl_mem input); +void backward_dropout_layer_gpu(dropout_layer layer, cl_mem delta); #endif #endif diff --git a/src/network.c b/src/network.c index ae030cee..64a60321 100644 --- a/src/network.c +++ b/src/network.c @@ -219,6 +219,10 @@ void backward_network(network net, float *input) maxpool_layer layer = *(maxpool_layer *)net.layers[i]; if(i != 0) backward_maxpool_layer(layer, prev_delta); } + else if(net.types[i] == DROPOUT){ + dropout_layer layer = *(dropout_layer *)net.layers[i]; + backward_dropout_layer(layer, prev_delta); + } else if(net.types[i] == NORMALIZATION){ normalization_layer layer = *(normalization_layer *)net.layers[i]; if(i != 0) backward_normalization_layer(layer, prev_input, prev_delta); diff --git a/src/network_gpu.c b/src/network_gpu.c index 163d9147..d09aa714 100644 --- a/src/network_gpu.c +++ b/src/network_gpu.c @@ -101,6 +101,10 @@ void backward_network_gpu(network net, cl_mem input) maxpool_layer layer = *(maxpool_layer *)net.layers[i]; backward_maxpool_layer_gpu(layer, prev_delta); } + else if(net.types[i] == DROPOUT){ + dropout_layer layer = *(dropout_layer *)net.layers[i]; + backward_dropout_layer_gpu(layer, prev_delta); + } else if(net.types[i] == SOFTMAX){ softmax_layer layer = *(softmax_layer *)net.layers[i]; backward_softmax_layer_gpu(layer, prev_delta);