diff --git a/Makefile b/Makefile index 116d3bc7..65264de2 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -GPU=0 -OPENCV=0 +GPU=1 +OPENCV=1 DEBUG=0 ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20 @@ -34,7 +34,7 @@ CFLAGS+= -DGPU LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand endif -OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o +OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o compare.o ifeq ($(GPU), 1) OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o endif diff --git a/cfg/darknet.cfg b/cfg/darknet.cfg index 2e0a6247..f52ff3f8 100644 --- a/cfg/darknet.cfg +++ b/cfg/darknet.cfg @@ -1,12 +1,15 @@ [net] batch=128 -subdivisions=32 +subdivisions=1 height=256 width=256 channels=3 -learning_rate=0.01 momentum=0.9 decay=0.0005 +learning_rate=0.01 +policy=poly +power=.5 +max_batches=600000 [crop] crop_height=224 @@ -24,8 +27,8 @@ pad=1 activation=leaky [maxpool] +size=3 stride=2 -size=2 [convolutional] filters=32 @@ -35,8 +38,8 @@ pad=1 activation=leaky [maxpool] +size=3 stride=2 -size=2 [convolutional] filters=64 @@ -46,8 +49,8 @@ pad=1 activation=leaky [maxpool] +size=3 stride=2 -size=2 [convolutional] filters=128 @@ -57,8 +60,8 @@ pad=1 activation=leaky [maxpool] +size=3 stride=2 -size=2 [convolutional] filters=256 @@ -68,8 +71,8 @@ pad=1 activation=leaky [maxpool] +size=3 stride=2 -size=2 [convolutional] filters=512 @@ -79,8 +82,8 @@ pad=1 activation=leaky [maxpool] +size=3 stride=2 -size=2 [convolutional] filters=1024 @@ -96,7 +99,7 @@ probability=.5 [connected] output=1000 -activation=leaky +activation=linear [softmax] diff --git a/src/captcha.c b/src/captcha.c index 68d8915f..4e77ce26 100644 --- a/src/captcha.c +++ b/src/captcha.c @@ -38,9 +38,8 @@ void train_captcha(char *cfgfile, char *weightfile) load_weights(&net, weightfile); } printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - //net.seen=0; int imgs = 1024; - int i = net.seen/imgs; + int i = *net.seen/imgs; int solved = 1; list *plist; char **labels = get_labels("/data/captcha/reimgs.labels.list"); @@ -85,10 +84,9 @@ void train_captcha(char *cfgfile, char *weightfile) printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); float loss = train_network(net, train); - net.seen += imgs; if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if(i%100==0){ char buff[256]; diff --git a/src/coco.c b/src/coco.c index 62ae4292..87f3dcaa 100644 --- a/src/coco.c +++ b/src/coco.c @@ -62,7 +62,7 @@ void train_coco(char *cfgfile, char *weightfile) } printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 128; - int i = net.seen/imgs; + int i = *net.seen/imgs; data train, buffer; @@ -107,7 +107,6 @@ void train_coco(char *cfgfile, char *weightfile) time=clock(); float loss = train_network(net, train); - net.seen += imgs; if (avg_loss < 0) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; @@ -253,8 +252,9 @@ void validate_recall(char *cfgfile, char *weightfile) int total = 0; int correct = 0; float avg_iou = 0; - int nms = 0; + int nms = 1; int proposals = 0; + int save = 1; for (i = 0; i < N; ++i) { char *path = paths[i]; @@ -277,6 +277,27 @@ void validate_recall(char *cfgfile, char *weightfile) for(k = 0; k < num_boxes*num_boxes*num; ++k){ if(probs[k][0] > thresh){ ++proposals; + if(save){ + char buff[256]; + sprintf(buff, "/data/extracted/nms_preds/%d", proposals); + int dx = (boxes[k].x - boxes[k].w/2) * orig.w; + int dy = (boxes[k].y - boxes[k].h/2) * orig.h; + int w = boxes[k].w * orig.w; + int h = boxes[k].h * orig.h; + image cropped = crop_image(orig, dx, dy, w, h); + image sized = resize_image(cropped, 224, 224); +#ifdef OPENCV + save_image_jpg(sized, buff); +#endif + free_image(sized); + free_image(cropped); + sprintf(buff, "/data/extracted/nms_pred_boxes/%d.txt", proposals); + char *im_id = basecfg(path); + FILE *fp = fopen(buff, "w"); + fprintf(fp, "%s %d %d %d %d\n", im_id, dx, dy, dx+w, dy+h); + fclose(fp); + free(im_id); + } } } for (j = 0; j < num_labels; ++j) { @@ -332,7 +353,7 @@ void extract_boxes(char *cfgfile, char *weightfile) int k; int count = 0; - float iou_thresh = .1; + float iou_thresh = .3; for (i = 0; i < N; ++i) { fprintf(stderr, "%5d %5d\n", i, count); @@ -361,7 +382,7 @@ void extract_boxes(char *cfgfile, char *weightfile) if (iou > iou_thresh){ if (!overlaps) { char buff[256]; - sprintf(buff, "/home/pjreddie/extracted/labels/%d.txt", count); + sprintf(buff, "/data/extracted/labels/%d.txt", count); label = fopen(buff, "w"); overlaps = 1; } @@ -370,16 +391,16 @@ void extract_boxes(char *cfgfile, char *weightfile) } if (overlaps) { char buff[256]; - sprintf(buff, "/home/pjreddie/extracted/imgs/%d", count++); + sprintf(buff, "/data/extracted/imgs/%d", count++); int dx = (boxes[k].x - boxes[k].w/2) * orig.w; int dy = (boxes[k].y - boxes[k].h/2) * orig.h; int w = boxes[k].w * orig.w; int h = boxes[k].h * orig.h; image cropped = crop_image(orig, dx, dy, w, h); image sized = resize_image(cropped, 224, 224); - #ifdef OPENCV +#ifdef OPENCV save_image_jpg(sized, buff); - #endif +#endif free_image(sized); free_image(cropped); fclose(label); diff --git a/src/compare.c b/src/compare.c new file mode 100644 index 00000000..9b6d6bf2 --- /dev/null +++ b/src/compare.c @@ -0,0 +1,303 @@ +#include + +#include "network.h" +#include "detection_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" + +void train_compare(char *cfgfile, char *weightfile) +{ + data_seed = time(0); + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + list *plist = get_paths("data/compare.train.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + printf("%d\n", N); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = COMPARE_DATA; + + load_thread = load_data_in_thread(args); + int epoch = *net.seen/N; + int i = 0; + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%.3f: %f, %f avg, %lf seconds, %d images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if(i%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + i = 0; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; + } + } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_compare(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *plist = get_paths("data/compare.val.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size/2; + free_list(plist); + + clock_t time; + int correct = 0; + int total = 0; + int splits = 10; + int num = (i+1)*N/splits - i*N/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = num; + args.m = 0; + args.d = &buffer; + args.type = COMPARE_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*N/splits - i*N/splits; + char **part = paths+(i*N/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + int j,k; + for(j = 0; j < val.y.rows; ++j){ + for(k = 0; k < 20; ++k){ + if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ + ++total; + if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ + ++correct; + } + } + } + } + free_matrix(pred); + printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +typedef struct { + network net; + char *filename; + int class; + float elo; +} sortable_bbox; + +int total_compares = 0; + +int elo_comparator(const void*a, const void *b) +{ + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + if(box1.elo == box2.elo) return 0; + if(box1.elo > box2.elo) return -1; + return 1; +} + +int bbox_comparator(const void *a, const void *b) +{ + ++total_compares; + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + network net = box1.net; + int class = box1.class; + + image im1 = load_image_color(box1.filename, net.w, net.h); + image im2 = load_image_color(box2.filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c); + float *predictions = network_predict(net, X); + + free_image(im1); + free_image(im2); + free(X); + if (predictions[class*2] > predictions[class*2+1]){ + return 1; + } + return -1; +} + +void bbox_fight(sortable_bbox *a, sortable_bbox *b) +{ + int k = 32; + int result = bbox_comparator(a,b); + float EA = 1./(1+pow(10, (b->elo - a->elo)/400.)); + float EB = 1./(1+pow(10, (a->elo - b->elo)/400.)); + float SA = 1.*(result > 0); + float SB = 1.*(result < 0); + a->elo = a->elo + k*(SA - EA); + b->elo = b->elo + k*(SB - EB); +} + +void SortMaster3000(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Sorting %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + clock_t time=clock(); + qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); + for(i = 0; i < N; ++i){ + printf("%s\n", boxes[i].filename); + } + printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void BattleRoyaleWithCheese(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Battling %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + int round; + clock_t time=clock(); + for(round = 1; round <= 40; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); + for(i = 0; i < N/2; ++i){ + bbox_fight(boxes+i*2, boxes+i*2+1); + } + if(round >= 4){ + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + if(round == 4){ + N = N/2; + }else{ + N = (N*9/10)/2*2; + } + } + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + for(i = 0; i < N; ++i){ + printf("%s %f\n", boxes[i].filename, boxes[i].elo); + } + printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void run_compare(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); + else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); + else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); + /* + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); + */ +} diff --git a/src/cost_layer.c b/src/cost_layer.c index d1ae6e5b..4ec0ac4a 100644 --- a/src/cost_layer.c +++ b/src/cost_layer.c @@ -26,12 +26,13 @@ char *get_cost_string(COST_TYPE a) return "sse"; } -cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type) +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) { fprintf(stderr, "Cost Layer: %d inputs\n", inputs); cost_layer l = {0}; l.type = COST; + l.scale = scale; l.batch = batch; l.inputs = inputs; l.outputs = inputs; @@ -61,7 +62,7 @@ void forward_cost_layer(cost_layer l, network_state state) void backward_cost_layer(const cost_layer l, network_state state) { - axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); + axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1); } #ifdef GPU @@ -92,7 +93,7 @@ void forward_cost_layer_gpu(cost_layer l, network_state state) void backward_cost_layer_gpu(const cost_layer l, network_state state) { - axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1); + axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1); } #endif diff --git a/src/cost_layer.h b/src/cost_layer.h index 07323239..9ad3124d 100644 --- a/src/cost_layer.h +++ b/src/cost_layer.h @@ -7,7 +7,7 @@ typedef layer cost_layer; COST_TYPE get_cost_type(char *s); char *get_cost_string(COST_TYPE a); -cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type); +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); void forward_cost_layer(const cost_layer l, network_state state); void backward_cost_layer(const cost_layer l, network_state state); diff --git a/src/darknet.c b/src/darknet.c index f87afc60..3709ed1e 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -18,6 +18,7 @@ extern void run_writing(int argc, char **argv); extern void run_captcha(int argc, char **argv); extern void run_nightmare(int argc, char **argv); extern void run_dice(int argc, char **argv); +extern void run_compare(int argc, char **argv); void change_rate(char *filename, float scale, float add) { @@ -86,7 +87,7 @@ void partial(char *cfgfile, char *weightfile, char *outfile, int max) if(weightfile){ load_weights_upto(&net, weightfile, max); } - net.seen = 0; + *net.seen = 0; save_weights_upto(net, outfile, max); } @@ -179,6 +180,8 @@ int main(int argc, char **argv) run_yolo(argc, argv); } else if (0 == strcmp(argv[1], "coco")){ run_coco(argc, argv); + } else if (0 == strcmp(argv[1], "compare")){ + run_compare(argc, argv); } else if (0 == strcmp(argv[1], "dice")){ run_dice(argc, argv); } else if (0 == strcmp(argv[1], "writing")){ diff --git a/src/data.c b/src/data.c index ec2b3046..003338e6 100644 --- a/src/data.c +++ b/src/data.c @@ -413,8 +413,8 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl data load_data_compare(int n, char **paths, int m, int classes, int w, int h) { - char **random_paths = get_random_paths(paths, 2*n, m); - int i; + if(m) paths = get_random_paths(paths, 2*n, m); + int i,j; data d; d.shallow = 0; @@ -425,20 +425,51 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h) int k = 2*(classes); d.y = make_matrix(n, k); for(i = 0; i < n; ++i){ - image im1 = load_image_color(random_paths[i*2], w, h); - image im2 = load_image_color(random_paths[i*2+1], w, h); + image im1 = load_image_color(paths[i*2], w, h); + image im2 = load_image_color(paths[i*2+1], w, h); d.X.vals[i] = calloc(d.X.cols, sizeof(float)); memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); - //char *imlabel1 = find_replace(random_paths[i*2], "imgs", "labels"); - //char *imlabel2 = find_replace(random_paths[i*2+1], "imgs", "labels"); + int id; + float iou; + + char *imlabel1 = find_replace(paths[i*2], "imgs", "labels"); + imlabel1 = find_replace(imlabel1, "jpg", "txt"); + FILE *fp1 = fopen(imlabel1, "r"); + + while(fscanf(fp1, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; + } + + char *imlabel2 = find_replace(paths[i*2+1], "imgs", "labels"); + imlabel2 = find_replace(imlabel2, "jpg", "txt"); + FILE *fp2 = fopen(imlabel2, "r"); + + while(fscanf(fp2, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; + } + + for (j = 0; j < classes; ++j){ + if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ + d.y.vals[i][2*j] = 1; + d.y.vals[i][2*j+1] = 0; + } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ + d.y.vals[i][2*j] = 0; + d.y.vals[i][2*j+1] = 1; + } else { + d.y.vals[i][2*j] = SECRET_NUM; + d.y.vals[i][2*j+1] = SECRET_NUM; + } + } + fclose(fp1); + fclose(fp2); free_image(im1); free_image(im2); } - free(random_paths); + if(m) free(paths); return d; } @@ -503,11 +534,11 @@ data load_data_detection(int n, char **paths, int m, int classes, int w, int h, void *load_thread(void *ptr) { - - #ifdef GPU - cudaError_t status = cudaSetDevice(gpu_index); - check_error(status); - #endif + +#ifdef GPU + cudaError_t status = cudaSetDevice(gpu_index); + check_error(status); +#endif printf("Loading data: %d\n", rand_r(&data_seed)); load_args a = *(struct load_args*)ptr; @@ -517,6 +548,8 @@ void *load_thread(void *ptr) *a.d = load_data_detection(a.n, a.paths, a.m, a.classes, a.w, a.h, a.num_boxes, a.background); } else if (a.type == REGION_DATA){ *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes); + } else if (a.type == COMPARE_DATA){ + *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); } else if (a.type == IMAGE_DATA){ *(a.im) = load_image_color(a.path, 0, 0); *(a.resized) = resize_image(*(a.im), a.w, a.h); diff --git a/src/data.h b/src/data.h index 7c425ba5..216ab0c4 100644 --- a/src/data.h +++ b/src/data.h @@ -26,7 +26,7 @@ typedef struct{ } data; typedef enum { - CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA } data_type; typedef struct load_args{ diff --git a/src/dice.c b/src/dice.c index 3283fe95..7948741a 100644 --- a/src/dice.c +++ b/src/dice.c @@ -18,7 +18,7 @@ void train_dice(char *cfgfile, char *weightfile) } printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 1024; - int i = net.seen/imgs; + int i = *net.seen/imgs; char **labels = dice_labels; list *plist = get_paths("data/dice/dice.train.list"); char **paths = (char **)list_to_array(plist); @@ -32,10 +32,9 @@ void train_dice(char *cfgfile, char *weightfile) time=clock(); float loss = train_network(net, train); - net.seen += imgs; if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if((i % 100) == 0) net.learning_rate *= .1; if(i%100==0){ diff --git a/src/image.c b/src/image.c index fa0bcebc..b6d75778 100644 --- a/src/image.c +++ b/src/image.c @@ -274,6 +274,8 @@ void show_image_cv(image p, char *name) #ifdef OPENCV void save_image_jpg(image p, char *name) { + image copy = copy_image(p); + rgbgr_image(copy); int x,y,k; char buff[256]; @@ -284,12 +286,13 @@ void show_image_cv(image p, char *name) for(y = 0; y < p.h; ++y){ for(x = 0; x < p.w; ++x){ for(k= 0; k < p.c; ++k){ - disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(p,x,y,k)*255); + disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255); } } } cvSaveImage(buff, disp,0); cvReleaseImage(&disp); + free_image(copy); } #endif diff --git a/src/imagenet.c b/src/imagenet.c index 5d794835..c826a0f4 100644 --- a/src/imagenet.c +++ b/src/imagenet.c @@ -19,7 +19,6 @@ void train_imagenet(char *cfgfile, char *weightfile) load_weights(&net, weightfile); } printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - //net.seen=0; int imgs = 1024; char **labels = get_labels("data/inet.labels.list"); list *plist = get_paths("/data/imagenet/cls.train.list"); @@ -43,8 +42,8 @@ void train_imagenet(char *cfgfile, char *weightfile) args.type = CLASSIFICATION_DATA; load_thread = load_data_in_thread(args); - int epoch = net.seen/N; - while(1){ + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ time=clock(); pthread_join(load_thread, 0); train = buffer; @@ -59,19 +58,21 @@ void train_imagenet(char *cfgfile, char *weightfile) printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); float loss = train_network(net, train); - net.seen += imgs; if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%.3f: %f, %f avg, %lf seconds, %d images\n", (float)net.seen/N, loss, avg_loss, sec(clock()-time), net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); free_data(train); - if(net.seen/N > epoch){ - epoch = net.seen/N; + if(*net.seen/N > epoch){ + epoch = *net.seen/N; char buff[256]; sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); save_weights(net, buff); - if(epoch%22 == 0) net.learning_rate *= .1; } } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); free_data(buffer); free_network(net); diff --git a/src/network.c b/src/network.c index 70bcb584..d823c157 100644 --- a/src/network.c +++ b/src/network.c @@ -20,6 +20,30 @@ #include "dropout_layer.h" #include "route_layer.h" +int get_current_batch(network net) +{ + int batch_num = (*net.seen)/(net.batch*net.subdivisions); + return batch_num; +} + +float get_current_rate(network net) +{ + int batch_num = get_current_batch(net); + switch (net.policy) { + case CONSTANT: + return net.learning_rate; + case STEP: + return net.learning_rate * pow(net.gamma, batch_num/net.step); + case EXP: + return net.learning_rate * pow(net.gamma, batch_num); + case POLY: + return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power); + default: + fprintf(stderr, "Policy is weird!\n"); + return net.learning_rate; + } +} + char *get_layer_string(LAYER_TYPE a) { switch(a){ @@ -60,6 +84,7 @@ network make_network(int n) network net = {0}; net.n = n; net.layers = calloc(net.n, sizeof(layer)); + net.seen = calloc(1, sizeof(int)); #ifdef GPU net.input_gpu = calloc(1, sizeof(float *)); net.truth_gpu = calloc(1, sizeof(float *)); @@ -110,14 +135,15 @@ void update_network(network net) { int i; int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); for(i = 0; i < net.n; ++i){ layer l = net.layers[i]; if(l.type == CONVOLUTIONAL){ - update_convolutional_layer(l, update_batch, net.learning_rate, net.momentum, net.decay); + update_convolutional_layer(l, update_batch, rate, net.momentum, net.decay); } else if(l.type == DECONVOLUTIONAL){ - update_deconvolutional_layer(l, net.learning_rate, net.momentum, net.decay); + update_deconvolutional_layer(l, rate, net.momentum, net.decay); } else if(l.type == CONNECTED){ - update_connected_layer(l, update_batch, net.learning_rate, net.momentum, net.decay); + update_connected_layer(l, update_batch, rate, net.momentum, net.decay); } } } @@ -203,6 +229,7 @@ void backward_network(network net, network_state state) float train_network_datum(network net, float *x, float *y) { + *net.seen += net.batch; #ifdef GPU if(gpu_index >= 0) return train_network_datum_gpu(net, x, y); #endif @@ -214,7 +241,7 @@ float train_network_datum(network net, float *x, float *y) forward_network(net, state); backward_network(net, state); float error = get_network_cost(net); - if((net.seen/net.batch)%net.subdivisions == 0) update_network(net); + if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net); return error; } @@ -227,7 +254,6 @@ float train_network_sgd(network net, data d, int n) int i; float sum = 0; for(i = 0; i < n; ++i){ - net.seen += batch; get_random_batch(d, batch, X, y); float err = train_network_datum(net, X, y); sum += err; @@ -248,7 +274,6 @@ float train_network(network net, data d) float sum = 0; for(i = 0; i < n; ++i){ get_next_batch(d, batch, i*batch, X, y); - net.seen += batch; float err = train_network_datum(net, X, y); sum += err; } diff --git a/src/network.h b/src/network.h index 1d960c07..85e5dbc9 100644 --- a/src/network.h +++ b/src/network.h @@ -7,17 +7,27 @@ #include "layer.h" #include "data.h" +typedef enum { + CONSTANT, STEP, EXP, POLY +} learning_rate_policy; + typedef struct { int n; int batch; - int seen; + int *seen; int subdivisions; - float learning_rate; float momentum; float decay; layer *layers; int outputs; float *output; + learning_rate_policy policy; + + float learning_rate; + float gamma; + float power; + int step; + int max_batches; int inputs; int h, w, c; @@ -38,6 +48,8 @@ void forward_network_gpu(network net, network_state state); void backward_network_gpu(network net, network_state state); #endif +float get_current_rate(network net); +int get_current_batch(network net); void free_network(network net); void compare_networks(network n1, network n2, data d); char *get_layer_string(LAYER_TYPE a); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index a73ddd9c..1f0a6546 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -116,14 +116,15 @@ void update_network_gpu(network net) { int i; int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); for(i = 0; i < net.n; ++i){ layer l = net.layers[i]; if(l.type == CONVOLUTIONAL){ - update_convolutional_layer_gpu(l, update_batch, net.learning_rate, net.momentum, net.decay); + update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay); } else if(l.type == DECONVOLUTIONAL){ - update_deconvolutional_layer_gpu(l, net.learning_rate, net.momentum, net.decay); + update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay); } else if(l.type == CONNECTED){ - update_connected_layer_gpu(l, update_batch, net.learning_rate, net.momentum, net.decay); + update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay); } } } @@ -147,7 +148,7 @@ float train_network_datum_gpu(network net, float *x, float *y) forward_network_gpu(net, state); backward_network_gpu(net, state); float error = get_network_cost(net); - if ((net.seen / net.batch) % net.subdivisions == 0) update_network_gpu(net); + if (((*net.seen) / net.batch) % net.subdivisions == 0) update_network_gpu(net); return error; } diff --git a/src/parser.c b/src/parser.c index ad324e94..b9f6cb63 100644 --- a/src/parser.c +++ b/src/parser.c @@ -189,7 +189,8 @@ cost_layer parse_cost(list *options, size_params params) { char *type_s = option_find_str(options, "type", "sse"); COST_TYPE type = get_cost_type(type_s); - cost_layer layer = make_cost_layer(params.batch, params.inputs, type); + float scale = option_find_float_quiet(options, "scale",1); + cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale); return layer; } @@ -305,6 +306,16 @@ route_layer parse_route(list *options, size_params params, network net) return layer; } +learning_rate_policy get_policy(char *s) +{ + if (strcmp(s, "poly")==0) return POLY; + if (strcmp(s, "constant")==0) return CONSTANT; + if (strcmp(s, "step")==0) return STEP; + if (strcmp(s, "exp")==0) return EXP; + fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); + return CONSTANT; +} + void parse_net_options(list *options, network *net) { net->batch = option_find_int(options, "batch",1); @@ -319,7 +330,20 @@ void parse_net_options(list *options, network *net) net->w = option_find_int_quiet(options, "width",0); net->c = option_find_int_quiet(options, "channels",0); net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); + + char *policy_s = option_find_str(options, "policy", "constant"); + net->policy = get_policy(policy_s); + if(net->policy == STEP){ + net->step = option_find_int(options, "step", 1); + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == EXP){ + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == POLY){ + net->power = option_find_float(options, "power", 1); + } + net->max_batches = option_find_int(options, "max_batches", 0); } network parse_network_cfg(char *filename) @@ -532,7 +556,7 @@ void save_weights_double(network net, char *filename) fwrite(&net.learning_rate, sizeof(float), 1, fp); fwrite(&net.momentum, sizeof(float), 1, fp); fwrite(&net.decay, sizeof(float), 1, fp); - fwrite(&net.seen, sizeof(int), 1, fp); + fwrite(net.seen, sizeof(int), 1, fp); int i,j,k; for(i = 0; i < net.n; ++i){ @@ -571,7 +595,7 @@ void save_weights_upto(network net, char *filename, int cutoff) fwrite(&net.learning_rate, sizeof(float), 1, fp); fwrite(&net.momentum, sizeof(float), 1, fp); fwrite(&net.decay, sizeof(float), 1, fp); - fwrite(&net.seen, sizeof(int), 1, fp); + fwrite(net.seen, sizeof(int), 1, fp); int i; for(i = 0; i < net.n && i < cutoff; ++i){ @@ -620,10 +644,11 @@ void load_weights_upto(network *net, char *filename, int cutoff) FILE *fp = fopen(filename, "r"); if(!fp) file_error(filename); - fread(&net->learning_rate, sizeof(float), 1, fp); - fread(&net->momentum, sizeof(float), 1, fp); - fread(&net->decay, sizeof(float), 1, fp); - fread(&net->seen, sizeof(int), 1, fp); + float garbage; + fread(&garbage, sizeof(float), 1, fp); + fread(&garbage, sizeof(float), 1, fp); + fread(&garbage, sizeof(float), 1, fp); + fread(net->seen, sizeof(int), 1, fp); int i; for(i = 0; i < net->n && i < cutoff; ++i){ diff --git a/src/utils.c b/src/utils.c index d54e9665..3121ef6f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -8,6 +8,29 @@ #include "utils.h" +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) +{ + size_t i; + for(i = 0; i < sections; ++i){ + size_t start = n*i/sections; + size_t end = n*(i+1)/sections; + size_t num = end-start; + shuffle(arr+(start*size), num, size); + } +} + +void shuffle(void *arr, size_t n, size_t size) +{ + size_t i; + void *swp = calloc(1, size); + for(i = 0; i < n-1; ++i){ + size_t j = i + rand()/(RAND_MAX / (n-i)+1); + memcpy(swp, arr+(j*size), size); + memcpy(arr+(j*size), arr+(i*size), size); + memcpy(arr+(i*size), swp, size); + } +} + void del_arg(int argc, char **argv, int index) { int i; diff --git a/src/utils.h b/src/utils.h index 93327022..1b9ba08c 100644 --- a/src/utils.h +++ b/src/utils.h @@ -6,6 +6,8 @@ #define SECRET_NUM -1234 +void shuffle(void *arr, size_t n, size_t size); +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); void free_ptrs(void **ptrs, int n); char *basecfg(char *cfgfile); int alphanum_to_int(char c); diff --git a/src/writing.c b/src/writing.c index 1c1684bc..cfbc5fd1 100644 --- a/src/writing.c +++ b/src/writing.c @@ -15,7 +15,7 @@ void train_writing(char *cfgfile, char *weightfile) } printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 1024; - int i = net.seen/imgs; + int i = *net.seen/imgs; list *plist = get_paths("figures.list"); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); @@ -44,10 +44,9 @@ void train_writing(char *cfgfile, char *weightfile) cvWaitKey(0); */ - net.seen += imgs; if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if((i % 20000) == 0) net.learning_rate *= .1; //if(i%100 == 0 && net.learning_rate > .00001) net.learning_rate *= .97; diff --git a/src/yolo.c b/src/yolo.c index 9bf96dea..61a5344e 100644 --- a/src/yolo.c +++ b/src/yolo.c @@ -68,7 +68,7 @@ void train_yolo(char *cfgfile, char *weightfile) detection_layer layer = get_network_detection_layer(net); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 128; - int i = net.seen/imgs; + int i = *net.seen/imgs; char **paths; list *plist = get_paths(train_images); @@ -112,7 +112,6 @@ void train_yolo(char *cfgfile, char *weightfile) printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); float loss = train_network(net, train); - net.seen += imgs; if (avg_loss < 0) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1;