From 481b57a96a9ef29b112caec1bb3e17ffb043ceae Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Sat, 24 Sep 2016 23:12:54 -0700 Subject: [PATCH] So I have this new programming paradigm....... --- Makefile | 4 +- data/labels/make_labels.py | 17 +- src/activation_layer.c | 5 + src/art.c | 1 + src/avgpool_layer.c | 4 + src/batchnorm_layer.c | 6 + src/classifier.c | 14 +- src/coco.c | 40 ++--- src/connected_layer.c | 8 + src/convolutional_layer.c | 7 + src/cost_layer.c | 6 + src/crnn_layer.c | 8 + src/crop_layer.c | 8 + src/darknet.c | 13 -- src/data.c | 60 ++++--- src/deconvolutional_layer.c | 4 + src/demo.c | 47 +++--- src/demo.h | 2 +- src/detection_layer.c | 34 ++++ src/detection_layer.h | 1 + src/detector.c | 121 +++++--------- src/dropout_layer.c | 4 + src/gru_layer.c | 8 + src/gru_layer.h | 23 ++- src/image.c | 190 +++++++++++++++------- src/image.h | 13 +- src/layer.h | 8 + src/local_layer.c | 8 + src/maxpool_layer.c | 4 + src/network.c | 113 ++----------- src/network_kernels.cu | 122 +------------- src/normalization_layer.c | 6 + src/parser.c | 311 +++++++----------------------------- src/region_layer.c | 43 +++++ src/region_layer.h | 1 + src/reorg_layer.c | 6 + src/rnn_layer.c | 6 + src/rnn_layer.h | 23 +-- src/rnn_vid.c | 2 + src/route_layer.c | 22 ++- src/route_layer.h | 8 +- src/shortcut_layer.c | 6 + src/softmax_layer.c | 6 + src/utils.c | 21 ++- src/utils.h | 2 +- src/voxel.c | 1 + src/xnor_layer.c | 86 ---------- src/xnor_layer.h | 11 -- src/yolo.c | 79 +++------ 49 files changed, 629 insertions(+), 914 deletions(-) delete mode 100644 src/xnor_layer.c delete mode 100644 src/xnor_layer.h diff --git a/Makefile b/Makefile index b36b6b85..0a48e550 100644 --- a/Makefile +++ b/Makefile @@ -41,10 +41,10 @@ CFLAGS+= -DCUDNN LDFLAGS+= -lcudnn endif -OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o +OBJ=gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ -OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o +OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o endif OBJS = $(addprefix $(OBJDIR), $(OBJ)) diff --git a/data/labels/make_labels.py b/data/labels/make_labels.py index bdd2421f..1dacdc37 100644 --- a/data/labels/make_labels.py +++ b/data/labels/make_labels.py @@ -1,6 +1,19 @@ import os +import string +import pipes -l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] +#l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + +l = string.printable for word in l: - os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word)) + #os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word)) + if word == ' ': + os.system('convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:"\ " 32.png') + elif word == '\\': + os.system('convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:"\\\\\\\\" 92.png') + elif ord(word) in [9,10,11,12,13,14]: + pass + else: + os.system("convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:%s \"%d.png\""%(pipes.quote(word), ord(word))) + diff --git a/src/activation_layer.c b/src/activation_layer.c index 49e638d4..3430dac4 100644 --- a/src/activation_layer.c +++ b/src/activation_layer.c @@ -21,7 +21,12 @@ layer make_activation_layer(int batch, int inputs, ACTIVATION activation) l.output = calloc(batch*inputs, sizeof(float*)); l.delta = calloc(batch*inputs, sizeof(float*)); + l.forward = forward_activation_layer; + l.backward = backward_activation_layer; #ifdef GPU + l.forward_gpu = forward_activation_layer_gpu; + l.backward_gpu = backward_activation_layer_gpu; + l.output_gpu = cuda_make_array(l.output, inputs*batch); l.delta_gpu = cuda_make_array(l.delta, inputs*batch); #endif diff --git a/src/art.c b/src/art.c index 9a0559e5..71d37192 100644 --- a/src/art.c +++ b/src/art.c @@ -8,6 +8,7 @@ #ifdef OPENCV #include "opencv2/highgui/highgui_c.h" +image get_image_from_stream(CvCapture *cap); #endif diff --git a/src/avgpool_layer.c b/src/avgpool_layer.c index 0feae710..c6db477e 100644 --- a/src/avgpool_layer.c +++ b/src/avgpool_layer.c @@ -19,7 +19,11 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) int output_size = l.outputs * batch; l.output = calloc(output_size, sizeof(float)); l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_avgpool_layer; + l.backward = backward_avgpool_layer; #ifdef GPU + l.forward_gpu = forward_avgpool_layer_gpu; + l.backward_gpu = backward_avgpool_layer_gpu; l.output_gpu = cuda_make_array(l.output, output_size); l.delta_gpu = cuda_make_array(l.delta, output_size); #endif diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c index 9b68277e..510f1b2f 100644 --- a/src/batchnorm_layer.c +++ b/src/batchnorm_layer.c @@ -28,7 +28,13 @@ layer make_batchnorm_layer(int batch, int w, int h, int c) layer.rolling_mean = calloc(c, sizeof(float)); layer.rolling_variance = calloc(c, sizeof(float)); + + layer.forward = forward_batchnorm_layer; + layer.backward = backward_batchnorm_layer; #ifdef GPU + layer.forward_gpu = forward_batchnorm_layer_gpu; + layer.backward_gpu = backward_batchnorm_layer_gpu; + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); diff --git a/src/classifier.c b/src/classifier.c index b42d0108..208b7ed4 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -10,6 +10,7 @@ #ifdef OPENCV #include "opencv2/highgui/highgui_c.h" +image get_image_from_stream(CvCapture *cap); #endif list *read_data_cfg(char *filename) @@ -57,25 +58,26 @@ void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int #ifdef GPU int i; - srand(time(0)); float avg_loss = -1; char *base = basecfg(cfgfile); printf("%s\n", base); printf("%d\n", ngpus); network *nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); for(i = 0; i < ngpus; ++i){ + srand(seed); cuda_set_device(gpus[i]); nets[i] = parse_network_cfg(cfgfile); - if(clear) *nets[i].seen = 0; if(weightfile){ load_weights(&nets[i], weightfile); } - } - network net = nets[0]; - for(i = 0; i < ngpus; ++i){ - *nets[i].seen = *net.seen; + if(clear) *nets[i].seen = 0; nets[i].learning_rate *= ngpus; } + srand(time(0)); + network net = nets[0]; int imgs = net.batch * net.subdivisions * ngpus; diff --git a/src/coco.c b/src/coco.c index b78d6402..939a08d5 100644 --- a/src/coco.c +++ b/src/coco.c @@ -12,14 +12,10 @@ #include "opencv2/highgui/highgui_c.h" #endif -void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); - char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; -image coco_labels[80]; - void train_coco(char *cfgfile, char *weightfile) { //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; @@ -160,7 +156,6 @@ void validate_coco(char *cfgfile, char *weightfile) layer l = net.layers[net.n-1]; int classes = l.classes; - int square = l.sqrt; int side = l.side; int j; @@ -217,10 +212,10 @@ void validate_coco(char *cfgfile, char *weightfile) char *path = paths[i+t-nthreads]; int image_id = get_coco_image_id(path); float *X = val_resized[t].data; - float *predictions = network_predict(net, X); + network_predict(net, X); int w = val[t].w; int h = val[t].h; - convert_detections(predictions, classes, l.n, square, side, w, h, thresh, probs, boxes, 0); + get_detection_boxes(l, w, h, thresh, probs, boxes, 0); if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, iou_thresh); print_cocos(fp, image_id, boxes, probs, side*side*l.n, classes, w, h); free_image(val[t]); @@ -250,7 +245,6 @@ void validate_coco_recall(char *cfgfile, char *weightfile) layer l = net.layers[net.n-1]; int classes = l.classes; - int square = l.sqrt; int side = l.side; int j, k; @@ -282,14 +276,15 @@ void validate_coco_recall(char *cfgfile, char *weightfile) image orig = load_image_color(path, 0, 0); image sized = resize_image(orig, net.w, net.h); char *id = basecfg(path); - float *predictions = network_predict(net, sized.data); - convert_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1); + network_predict(net, sized.data); + get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1); if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh); - char *labelpath = find_replace(path, "images", "labels"); - labelpath = find_replace(labelpath, "JPEGImages", "labels"); - labelpath = find_replace(labelpath, ".jpg", ".txt"); - labelpath = find_replace(labelpath, ".JPEG", ".txt"); + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); @@ -323,7 +318,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile) void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) { - + image *alphabet = load_alphabet(); network net = parse_network_cfg(cfgfile); if(weightfile){ load_weights(&net, weightfile); @@ -353,11 +348,11 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) image sized = resize_image(im, net.w, net.h); float *X = sized.data; time=clock(); - float *predictions = network_predict(net, X); + network_predict(net, X); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0); + get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0); if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms); - draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80); + draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, alphabet, 80); save_image(im, "prediction"); show_image(im, "predictions"); free_image(im); @@ -372,12 +367,7 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) void run_coco(int argc, char **argv) { - int i; - for(i = 0; i < 80; ++i){ - char buff[256]; - sprintf(buff, "data/labels/%s.png", coco_classes[i]); - coco_labels[i] = load_image_color(buff, 0, 0); - } + char *prefix = find_char_arg(argc, argv, "-prefix", 0); float thresh = find_float_arg(argc, argv, "-thresh", .2); int cam_index = find_int_arg(argc, argv, "-c", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0); @@ -394,5 +384,5 @@ void run_coco(int argc, char **argv) else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); - else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, coco_labels, 80, frame_skip); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix); } diff --git a/src/connected_layer.c b/src/connected_layer.c index f46c3e18..26942292 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -36,6 +36,10 @@ connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVAT l.weights = calloc(outputs*inputs, sizeof(float)); l.biases = calloc(outputs, sizeof(float)); + l.forward = forward_connected_layer; + l.backward = backward_connected_layer; + l.update = update_connected_layer; + //float scale = 1./sqrt(inputs); float scale = sqrt(2./inputs); for(i = 0; i < outputs*inputs; ++i){ @@ -66,6 +70,10 @@ connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVAT } #ifdef GPU + l.forward_gpu = forward_connected_layer_gpu; + l.backward_gpu = backward_connected_layer_gpu; + l.update_gpu = update_connected_layer_gpu; + l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); l.biases_gpu = cuda_make_array(l.biases, outputs); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 01bb700c..ef9c093c 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -209,6 +209,9 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + l.forward = forward_convolutional_layer; + l.backward = backward_convolutional_layer; + l.update = update_convolutional_layer; if(binary){ l.binary_weights = calloc(c*n*size*size, sizeof(float)); l.cweights = calloc(c*n*size*size, sizeof(char)); @@ -234,6 +237,10 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int } #ifdef GPU + l.forward_gpu = forward_convolutional_layer_gpu; + l.backward_gpu = backward_convolutional_layer_gpu; + l.update_gpu = update_convolutional_layer_gpu; + if(gpu_index >= 0){ l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); diff --git a/src/cost_layer.c b/src/cost_layer.c index 0d8cb8c1..f266c6a1 100644 --- a/src/cost_layer.c +++ b/src/cost_layer.c @@ -43,7 +43,13 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca l.delta = calloc(inputs*batch, sizeof(float)); l.output = calloc(inputs*batch, sizeof(float)); l.cost = calloc(1, sizeof(float)); + + l.forward = forward_cost_layer; + l.backward = backward_cost_layer; #ifdef GPU + l.forward_gpu = forward_cost_layer_gpu; + l.backward_gpu = backward_cost_layer_gpu; + l.delta_gpu = cuda_make_array(l.output, inputs*batch); l.output_gpu = cuda_make_array(l.delta, inputs*batch); #endif diff --git a/src/crnn_layer.c b/src/crnn_layer.c index 5d5fa636..febff63f 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -64,7 +64,15 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou l.output = l.output_layer->output; l.delta = l.output_layer->delta; + l.forward = forward_crnn_layer; + l.backward = backward_crnn_layer; + l.update = update_crnn_layer; + #ifdef GPU + l.forward_gpu = forward_crnn_layer_gpu; + l.backward_gpu = backward_crnn_layer_gpu; + l.update_gpu = update_crnn_layer_gpu; + l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); l.output_gpu = l.output_layer->output_gpu; l.delta_gpu = l.output_layer->delta_gpu; diff --git a/src/crop_layer.c b/src/crop_layer.c index 66f11ebc..11c59b49 100644 --- a/src/crop_layer.c +++ b/src/crop_layer.c @@ -10,6 +10,9 @@ image get_crop_image(crop_layer l) return float_to_image(w,h,c,l.output); } +void backward_crop_layer(const crop_layer l, network_state state){} +void backward_crop_layer_gpu(const crop_layer l, network_state state){} + crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) { fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); @@ -30,7 +33,12 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int l.inputs = l.w * l.h * l.c; l.outputs = l.out_w * l.out_h * l.out_c; l.output = calloc(l.outputs*batch, sizeof(float)); + l.forward = forward_crop_layer; + l.backward = backward_crop_layer; + #ifdef GPU + l.forward_gpu = forward_crop_layer_gpu; + l.backward_gpu = backward_crop_layer_gpu; l.output_gpu = cuda_make_array(l.output, l.outputs*batch); l.rand_gpu = cuda_make_array(0, l.batch*8); #endif diff --git a/src/darknet.c b/src/darknet.c index 1b723298..3bc0c6a7 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -136,17 +136,6 @@ void partial(char *cfgfile, char *weightfile, char *outfile, int max) save_weights_upto(net, outfile, max); } -void stacked(char *cfgfile, char *weightfile, char *outfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - net.seen = 0; - save_weights_double(net, outfile); -} - #include "convolutional_layer.h" void rescale_net(char *cfgfile, char *weightfile, char *outfile) { @@ -420,8 +409,6 @@ int main(int argc, char **argv) partial(argv[2], argv[3], argv[4], atoi(argv[5])); } else if (0 == strcmp(argv[1], "average")){ average(argc, argv); - } else if (0 == strcmp(argv[1], "stacked")){ - stacked(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "visualize")){ visualize(argv[2], (argc > 3) ? argv[3] : 0); } else if (0 == strcmp(argv[1], "imtest")){ diff --git a/src/data.c b/src/data.c index 5977a3fb..20d57481 100644 --- a/src/data.c +++ b/src/data.c @@ -47,7 +47,7 @@ char **get_random_paths(char **paths, int n, int m) for(i = 0; i < n; ++i){ int index = rand()%m; random_paths[i] = paths[index]; - if(i == 0) printf("%s\n", paths[index]); + //if(i == 0) printf("%s\n", paths[index]); } pthread_mutex_unlock(&mutex); return random_paths; @@ -58,7 +58,8 @@ char **find_replace_paths(char **paths, int n, char *find, char *replace) char **replace_paths = calloc(n, sizeof(char*)); int i; for(i = 0; i < n; ++i){ - char *replaced = find_replace(paths[i], find, replace); + char replaced[4096]; + find_replace(paths[i], find, replace, replaced); replace_paths[i] = copy_string(replaced); } return replace_paths; @@ -198,12 +199,13 @@ void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) { - char *labelpath = find_replace(path, "images", "labels"); - labelpath = find_replace(labelpath, "JPEGImages", "labels"); + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); - labelpath = find_replace(labelpath, ".jpg", ".txt"); - labelpath = find_replace(labelpath, ".JPG", ".txt"); - labelpath = find_replace(labelpath, ".JPEG", ".txt"); int count = 0; box_label *boxes = read_boxes(labelpath, &count); randomize_boxes(boxes, count); @@ -235,13 +237,14 @@ void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) { - char *labelpath = find_replace(path, "images", "labels"); - labelpath = find_replace(labelpath, "JPEGImages", "labels"); + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); - labelpath = find_replace(labelpath, ".jpg", ".txt"); - labelpath = find_replace(labelpath, ".png", ".txt"); - labelpath = find_replace(labelpath, ".JPG", ".txt"); - labelpath = find_replace(labelpath, ".JPEG", ".txt"); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); int count = 0; box_label *boxes = read_boxes(labelpath, &count); randomize_boxes(boxes, count); @@ -282,13 +285,14 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) { - char *labelpath = find_replace(path, "images", "labels"); - labelpath = find_replace(labelpath, "JPEGImages", "labels"); + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); - labelpath = find_replace(labelpath, ".jpg", ".txt"); - labelpath = find_replace(labelpath, ".png", ".txt"); - labelpath = find_replace(labelpath, ".JPG", ".txt"); - labelpath = find_replace(labelpath, ".JPEG", ".txt"); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); int count = 0; box_label *boxes = read_boxes(labelpath, &count); randomize_boxes(boxes, count); @@ -400,11 +404,12 @@ matrix load_tags_paths(char **paths, int n, int k) int i; int count = 0; for(i = 0; i < n; ++i){ - char *label = find_replace(paths[i], "imgs", "labels"); - label = find_replace(label, "_iconl.jpeg", ".txt"); + char label[4096]; + find_replace(paths[i], "imgs", "labels", label); + find_replace(label, "_iconl.jpeg", ".txt", label); FILE *file = fopen(label, "r"); if(!file){ - label = find_replace(label, "labels", "labels2"); + find_replace(label, "labels", "labels2", label); file = fopen(label, "r"); if(!file) continue; } @@ -518,16 +523,18 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h) int id; float iou; - char *imlabel1 = find_replace(paths[i*2], "imgs", "labels"); - imlabel1 = find_replace(imlabel1, "jpg", "txt"); + char imlabel1[4096]; + char imlabel2[4096]; + find_replace(paths[i*2], "imgs", "labels", imlabel1); + find_replace(imlabel1, "jpg", "txt", imlabel1); FILE *fp1 = fopen(imlabel1, "r"); while(fscanf(fp1, "%d %f", &id, &iou) == 2){ if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; } - char *imlabel2 = find_replace(paths[i*2+1], "imgs", "labels"); - imlabel2 = find_replace(imlabel2, "jpg", "txt"); + find_replace(paths[i*2+1], "imgs", "labels", imlabel2); + find_replace(imlabel2, "jpg", "txt", imlabel2); FILE *fp2 = fopen(imlabel2, "r"); while(fscanf(fp2, "%d %f", &id, &iou) == 2){ @@ -709,6 +716,7 @@ void *load_threads(void *ptr) { int i; load_args args = *(load_args *)ptr; + if (args.threads == 0) args.threads = 1; data *out = args.d; int total = args.n; free(ptr); diff --git a/src/deconvolutional_layer.c b/src/deconvolutional_layer.c index 1262238f..fbef9d58 100644 --- a/src/deconvolutional_layer.c +++ b/src/deconvolutional_layer.c @@ -80,6 +80,10 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + l.forward = forward_deconvolutional_layer; + l.backward = backward_deconvolutional_layer; + l.update = update_deconvolutional_layer; + #ifdef GPU l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); diff --git a/src/demo.c b/src/demo.c index 7c480b7d..6c653a9f 100644 --- a/src/demo.c +++ b/src/demo.c @@ -1,5 +1,6 @@ #include "network.h" #include "detection_layer.h" +#include "region_layer.h" #include "cost_layer.h" #include "utils.h" #include "parser.h" @@ -13,10 +14,10 @@ #ifdef OPENCV #include "opencv2/highgui/highgui_c.h" #include "opencv2/imgproc/imgproc_c.h" -void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); +image get_image_from_stream(CvCapture *cap); static char **demo_names; -static image *demo_labels; +static image *demo_alphabet; static int demo_classes; static float **probs; @@ -50,16 +51,23 @@ void *detect_in_thread(void *ptr) { float nms = .4; - detection_layer l = net.layers[net.n-1]; + layer l = net.layers[net.n-1]; float *X = det_s.data; float *prediction = network_predict(net, X); memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float)); mean_arrays(predictions, FRAMES, l.outputs, avg); + l.output = avg; free_image(det_s); - convert_detections(avg, l.classes, l.n, l.sqrt, l.side, 1, 1, demo_thresh, probs, boxes, 0); - if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms); + if(l.type == DETECTION){ + get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else if (l.type == REGION){ + get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else { + error("Last layer must produce detections\n"); + } + if (nms > 0) do_nms(boxes, probs, l.w*l.h*l.n, l.classes, nms); printf("\033[2J"); printf("\033[1;1H"); printf("\nFPS:%.1f\n",fps); @@ -69,7 +77,7 @@ void *detect_in_thread(void *ptr) det = images[(demo_index + FRAMES/2 + 1)%FRAMES]; demo_index = (demo_index + 1)%FRAMES; - draw_detections(det, l.side*l.side*l.n, demo_thresh, boxes, probs, demo_names, demo_labels, demo_classes); + draw_detections(det, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes); return 0; } @@ -83,12 +91,13 @@ double get_wall_time() return (double)time.tv_sec + (double)time.tv_usec * .000001; } -void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip) +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix) { //skip = frame_skip; + image *alphabet = load_alphabet(); int delay = frame_skip; demo_names = names; - demo_labels = labels; + demo_alphabet = alphabet; demo_classes = classes; demo_thresh = thresh; printf("Demo\n"); @@ -108,16 +117,16 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch if(!cap) error("Couldn't connect to webcam.\n"); - detection_layer l = net.layers[net.n-1]; + layer l = net.layers[net.n-1]; int j; avg = (float *) calloc(l.outputs, sizeof(float)); for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3); - boxes = (box *)calloc(l.side*l.side*l.n, sizeof(box)); - probs = (float **)calloc(l.side*l.side*l.n, sizeof(float *)); - for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *)); + boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); + probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *)); pthread_t fetch_thread; pthread_t detect_thread; @@ -141,9 +150,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch } int count = 0; - cvNamedWindow("Demo", CV_WINDOW_NORMAL); - cvMoveWindow("Demo", 0, 0); - cvResizeWindow("Demo", 1352, 1013); + if(!prefix){ + cvNamedWindow("Demo", CV_WINDOW_NORMAL); + cvMoveWindow("Demo", 0, 0); + cvResizeWindow("Demo", 1352, 1013); + } double before = get_wall_time(); @@ -153,7 +164,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); - if(1){ + if(!prefix){ show_image(disp, "Demo"); int c = cvWaitKey(1); if (c == 10){ @@ -164,7 +175,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch } }else{ char buff[256]; - sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count); + sprintf(buff, "%s_%08d", prefix, count); save_image(disp, buff); } @@ -201,7 +212,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch } } #else -void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip) +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix) { fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); } diff --git a/src/demo.h b/src/demo.h index 0e694bd5..5f922717 100644 --- a/src/demo.h +++ b/src/demo.h @@ -2,6 +2,6 @@ #define DEMO #include "image.h" -void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip); +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix); #endif diff --git a/src/detection_layer.c b/src/detection_layer.c index 1fe67677..6ee7f648 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -30,7 +30,12 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int l.truths = l.side*l.side*(1+l.coords+l.classes); l.output = calloc(batch*l.outputs, sizeof(float)); l.delta = calloc(batch*l.outputs, sizeof(float)); + + l.forward = forward_detection_layer; + l.backward = backward_detection_layer; #ifdef GPU + l.forward_gpu = forward_detection_layer_gpu; + l.backward_gpu = backward_detection_layer_gpu; l.output_gpu = cuda_make_array(l.output, batch*l.outputs); l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); #endif @@ -216,6 +221,35 @@ void backward_detection_layer(const detection_layer l, network_state state) axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); } +void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness) +{ + int i,j,n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.side*l.side; ++i){ + int row = i / l.side; + int col = i % l.side; + for(n = 0; n < l.n; ++n){ + int index = i*l.n + n; + int p_index = l.side*l.side*l.classes + i*l.n + n; + float scale = predictions[p_index]; + int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; + boxes[index].x = (predictions[box_index + 0] + col) / l.side * w; + boxes[index].y = (predictions[box_index + 1] + row) / l.side * h; + boxes[index].w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; + boxes[index].h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + for(j = 0; j < l.classes; ++j){ + int class_index = i*l.classes; + float prob = scale*predictions[class_index+j]; + probs[index][j] = (prob > thresh) ? prob : 0; + } + if(only_objectness){ + probs[index][0] = scale; + } + } + } +} + #ifdef GPU void forward_detection_layer_gpu(const detection_layer l, network_state state) diff --git a/src/detection_layer.h b/src/detection_layer.h index e8c3a725..e847a094 100644 --- a/src/detection_layer.h +++ b/src/detection_layer.h @@ -9,6 +9,7 @@ typedef layer detection_layer; detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); void forward_detection_layer(const detection_layer l, network_state state); void backward_detection_layer(const detection_layer l, network_state state); +void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); #ifdef GPU void forward_detection_layer_gpu(const detection_layer l, network_state state); diff --git a/src/detector.c b/src/detector.c index 94987507..1f48c618 100644 --- a/src/detector.c +++ b/src/detector.c @@ -1,16 +1,16 @@ #include "network.h" -#include "detection_layer.h" +#include "region_layer.h" #include "cost_layer.h" #include "utils.h" #include "parser.h" #include "box.h" +#include "demo.h" #ifdef OPENCV #include "opencv2/highgui/highgui_c.h" #endif static char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; -static image voc_labels[20]; void train_detector(char *cfgfile, char *weightfile) { @@ -49,13 +49,14 @@ void train_detector(char *cfgfile, char *weightfile) args.num_boxes = l.max_boxes; args.d = &buffer; args.type = DETECTION_DATA; + args.threads = 4; args.angle = net.angle; args.exposure = net.exposure; args.saturation = net.saturation; args.hue = net.hue; - pthread_t load_thread = load_data_in_thread(args); + pthread_t load_thread = load_data(args); clock_t time; //while(i*imgs < N*120){ while(get_current_batch(net) < net.max_batches){ @@ -63,7 +64,7 @@ void train_detector(char *cfgfile, char *weightfile) time=clock(); pthread_join(load_thread, 0); train = buffer; - load_thread = load_data_in_thread(args); + load_thread = load_data(args); /* int k; @@ -102,44 +103,6 @@ void train_detector(char *cfgfile, char *weightfile) save_weights(net, buff); } -static void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness) -{ - int i,j,n; - //int per_cell = 5*num+classes; - for (i = 0; i < side*side; ++i){ - int row = i / side; - int col = i % side; - for(n = 0; n < num; ++n){ - int index = i*num + n; - int p_index = index * (classes + 5) + 4; - float scale = predictions[p_index]; - int box_index = index * (classes + 5); - boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w; - boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h; - if(0){ - boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / side * w; - boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / side * h; - } - boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w; - boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h; - if(1){ - boxes[index].x = ((col + .5)/side + predictions[box_index + 0] * .5) * w; - boxes[index].y = ((row + .5)/side + predictions[box_index + 1] * .5) * h; - boxes[index].w = (exp(predictions[box_index + 2]) * .5) * w; - boxes[index].h = (exp(predictions[box_index + 3]) * .5) * h; - } - for(j = 0; j < classes; ++j){ - int class_index = index * (classes + 5) + 5; - float prob = scale*predictions[class_index+j]; - probs[index][j] = (prob > thresh) ? prob : 0; - } - if(only_objectness){ - probs[index][0] = scale; - } - } - } -} - void print_detector_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h) { int i, j; @@ -179,7 +142,6 @@ void validate_detector(char *cfgfile, char *weightfile) layer l = net.layers[net.n-1]; int classes = l.classes; - int side = l.w; int j; FILE **fps = calloc(classes, sizeof(FILE *)); @@ -188,9 +150,9 @@ void validate_detector(char *cfgfile, char *weightfile) snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); fps[j] = fopen(buff, "w"); } - box *boxes = calloc(side*side*l.n, sizeof(box)); - float **probs = calloc(side*side*l.n, sizeof(float *)); - for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); + box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); + float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); int m = plist->size; int i=0; @@ -235,12 +197,12 @@ void validate_detector(char *cfgfile, char *weightfile) char *path = paths[i+t-nthreads]; char *id = basecfg(path); float *X = val_resized[t].data; - float *predictions = network_predict(net, X); + network_predict(net, X); int w = val[t].w; int h = val[t].h; - convert_detections(predictions, classes, l.n, 0, side, w, h, thresh, probs, boxes, 0); - if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, nms); - print_detector_detections(fps, id, boxes, probs, side*side*l.n, classes, w, h); + get_region_boxes(l, w, h, thresh, probs, boxes, 0); + if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms); + print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h); free(id); free_image(val[t]); free_image(val_resized[t]); @@ -268,8 +230,6 @@ void validate_detector_recall(char *cfgfile, char *weightfile) layer l = net.layers[net.n-1]; int classes = l.classes; - int square = l.sqrt; - int side = l.side; int j, k; FILE **fps = calloc(classes, sizeof(FILE *)); @@ -278,9 +238,9 @@ void validate_detector_recall(char *cfgfile, char *weightfile) snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); fps[j] = fopen(buff, "w"); } - box *boxes = calloc(side*side*l.n, sizeof(box)); - float **probs = calloc(side*side*l.n, sizeof(float *)); - for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); + box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); + float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); int m = plist->size; int i=0; @@ -299,18 +259,19 @@ void validate_detector_recall(char *cfgfile, char *weightfile) image orig = load_image_color(path, 0, 0); image sized = resize_image(orig, net.w, net.h); char *id = basecfg(path); - float *predictions = network_predict(net, sized.data); - convert_detections(predictions, classes, l.n, square, l.w, 1, 1, thresh, probs, boxes, 1); - if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms); + network_predict(net, sized.data); + get_region_boxes(l, 1, 1, thresh, probs, boxes, 1); + if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms); - char *labelpath = find_replace(path, "images", "labels"); - labelpath = find_replace(labelpath, "JPEGImages", "labels"); - labelpath = find_replace(labelpath, ".jpg", ".txt"); - labelpath = find_replace(labelpath, ".JPEG", ".txt"); + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); - for(k = 0; k < side*side*l.n; ++k){ + for(k = 0; k < l.w*l.h*l.n; ++k){ if(probs[k][0] > thresh){ ++proposals; } @@ -319,7 +280,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile) ++total; box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; float best_iou = 0; - for(k = 0; k < side*side*l.n; ++k){ + for(k = 0; k < l.w*l.h*l.n; ++k){ float iou = box_iou(boxes[k], t); if(probs[k][0] > thresh && iou > best_iou){ best_iou = iou; @@ -340,13 +301,12 @@ void validate_detector_recall(char *cfgfile, char *weightfile) void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh) { - + image *alphabet = load_alphabet(); network net = parse_network_cfg(cfgfile); if(weightfile){ load_weights(&net, weightfile); } - detection_layer l = net.layers[net.n-1]; - l.side = l.w; + layer l = net.layers[net.n-1]; set_batch_network(&net, 1); srand(2222222); clock_t time; @@ -354,9 +314,9 @@ void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh char *input = buff; int j; float nms=.4; - box *boxes = calloc(l.side*l.side*l.n, sizeof(box)); - float **probs = calloc(l.side*l.side*l.n, sizeof(float *)); - for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); + box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); + float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); while(1){ if(filename){ strncpy(input, filename, 256); @@ -371,12 +331,12 @@ void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh image sized = resize_image(im, net.w, net.h); float *X = sized.data; time=clock(); - float *predictions = network_predict(net, X); + network_predict(net, X); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - convert_detections(predictions, l.classes, l.n, 0, l.w, 1, 1, thresh, probs, boxes, 0); - if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms); - //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20); - draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20); + get_region_boxes(l, 1, 1, thresh, probs, boxes, 0); + if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); + //draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, voc_names, voc_labels, 20); + draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, voc_names, alphabet, 20); save_image(im, "predictions"); show_image(im, "predictions"); @@ -392,14 +352,10 @@ void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh void run_detector(int argc, char **argv) { - int i; - for(i = 0; i < 20; ++i){ - char buff[256]; - sprintf(buff, "data/labels/%s.png", voc_names[i]); - voc_labels[i] = load_image_color(buff, 0, 0); - } - + char *prefix = find_char_arg(argc, argv, "-prefix", 0); float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); if(argc < 4){ fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; @@ -412,4 +368,5 @@ void run_detector(int argc, char **argv) else if(0==strcmp(argv[2], "train")) train_detector(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_detector(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix); } diff --git a/src/dropout_layer.c b/src/dropout_layer.c index 29b9193c..82be64b1 100644 --- a/src/dropout_layer.c +++ b/src/dropout_layer.c @@ -15,7 +15,11 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability) l.batch = batch; l.rand = calloc(inputs*batch, sizeof(float)); l.scale = 1./(1.-probability); + l.forward = forward_dropout_layer; + l.backward = backward_dropout_layer; #ifdef GPU + l.forward_gpu = forward_dropout_layer_gpu; + l.backward_gpu = backward_dropout_layer_gpu; l.rand_gpu = cuda_make_array(l.rand, inputs*batch); #endif return l; diff --git a/src/gru_layer.c b/src/gru_layer.c index 4c720ce3..b78e8682 100644 --- a/src/gru_layer.c +++ b/src/gru_layer.c @@ -85,7 +85,15 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no l.z_cpu = calloc(outputs*batch, sizeof(float)); l.h_cpu = calloc(outputs*batch, sizeof(float)); + l.forward = forward_gru_layer; + l.backward = backward_gru_layer; + l.update = update_gru_layer; + #ifdef GPU + l.forward_gpu = forward_gru_layer_gpu; + l.backward_gpu = backward_gru_layer_gpu; + l.update_gpu = update_gru_layer_gpu; + l.forgot_state_gpu = cuda_make_array(l.output, batch*outputs); l.forgot_delta_gpu = cuda_make_array(l.output, batch*outputs); l.prev_state_gpu = cuda_make_array(l.output, batch*outputs); diff --git a/src/gru_layer.h b/src/gru_layer.h index bb9478b9..9e19cee1 100644 --- a/src/gru_layer.h +++ b/src/gru_layer.h @@ -1,24 +1,23 @@ -#ifndef RNN_LAYER_H -#define RNN_LAYER_H +#ifndef GRU_LAYER_H +#define GRU_LAYER_H #include "activations.h" #include "layer.h" #include "network.h" -#define USET -layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log); +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize); -void forward_rnn_layer(layer l, network_state state); -void backward_rnn_layer(layer l, network_state state); -void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay); +void forward_gru_layer(layer l, network_state state); +void backward_gru_layer(layer l, network_state state); +void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay); #ifdef GPU -void forward_rnn_layer_gpu(layer l, network_state state); -void backward_rnn_layer_gpu(layer l, network_state state); -void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); -void push_rnn_layer(layer l); -void pull_rnn_layer(layer l); +void forward_gru_layer_gpu(layer l, network_state state); +void backward_gru_layer_gpu(layer l, network_state state); +void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); +void push_gru_layer(layer l); +void pull_gru_layer(layer l); #endif #endif diff --git a/src/image.c b/src/image.c index 21c2f8bb..09718fbe 100644 --- a/src/image.c +++ b/src/image.c @@ -10,6 +10,12 @@ #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" +#ifdef OPENCV +#include "opencv2/highgui/highgui_c.h" +#include "opencv2/imgproc/imgproc_c.h" +#endif + + int windows = 0; float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; @@ -25,10 +31,66 @@ float get_color(int c, int x, int max) return r; } +void composite_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x, y, k); + float val2 = get_pixel_extend(dest, dx+x, dy+y, k); + set_pixel(dest, dx+x, dy+y, k, val * val2); + } + } + } +} + +image border_image(image a, int border) +{ + image b = make_image(a.w + 2*border, a.h + 2*border, a.c); + int x,y,k; + for(k = 0; k < b.c; ++k){ + for(y = 0; y < b.h; ++y){ + for(x = 0; x < b.w; ++x){ + float val = get_pixel_extend(a, x - border, y - border, k); + set_pixel(b, x, y, k, val); + } + } + } + return b; +} + +image tile_images(image a, image b, int dx) +{ + if(a.w == 0) return copy_image(b); + image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); + fill_cpu(c.w*c.h*c.c, 1, c.data, 1); + embed_image(a, c, 0, 0); + composite_image(b, c, a.w + dx, 0); + return c; +} + +image get_label(image *characters, char *string) +{ + image label = make_empty_image(0,0,0); + while(*string){ + image l = characters[(int)*string]; + image n = tile_images(label, l, -4); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; +} + void draw_label(image a, int r, int c, image label, const float *rgb) { float ratio = (float) label.w / label.h; - int h = label.h; + int h = a.h * .04; + h = label.h; + h = a.h * .06; int w = ratio * h; image rl = resize_image(label, w, h); if (r - h >= 0) r = r - h; @@ -102,7 +164,19 @@ void draw_bbox(image a, box bbox, int w, float r, float g, float b) } } -void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes) +image *load_alphabet() +{ + int i; + image *alphabet = calloc(128, sizeof(image)); + for(i = 32; i < 127; ++i){ + char buff[256]; + sprintf(buff, "data/labels/%d.png", i); + alphabet[i] = load_image_color(buff, 0, 0); + } + return alphabet; +} + +void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *alphabet, int classes) { int i; @@ -111,7 +185,7 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, float prob = probs[i][class]; if(prob > thresh){ //int width = pow(prob, 1./2.)*30+1; - int width = 8; + int width = im.h * .012; printf("%s: %.0f%%\n", names[class], prob*100); int offset = class*1 % classes; float red = get_color(2,offset,classes); @@ -137,7 +211,10 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, if(bot > im.h-1) bot = im.h-1; draw_box_width(im, left, top, right, bot, width, red, green, blue); - if (labels) draw_label(im, top + width, left, labels[class], rgb); + if (alphabet) { + image label = get_label(alphabet, names[class]); + draw_label(im, top + width, left, label, rgb); + } } } } @@ -368,6 +445,53 @@ void show_image(image p, const char *name) } #ifdef OPENCV + +image ipl_to_image(IplImage* src) +{ + unsigned char *data = (unsigned char *)src->imageData; + int h = src->height; + int w = src->width; + int c = src->nChannels; + int step = src->widthStep; + image out = make_image(w, h, c); + int i, j, k, count=0;; + + for(k= 0; k < c; ++k){ + for(i = 0; i < h; ++i){ + for(j = 0; j < w; ++j){ + out.data[count++] = data[i*step + j*c + k]/255.; + } + } + } + return out; +} + +image load_image_cv(char *filename, int channels) +{ + IplImage* src = 0; + int flag = -1; + if (channels == 0) flag = -1; + else if (channels == 1) flag = 0; + else if (channels == 3) flag = 1; + else { + fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); + } + + if( (src = cvLoadImage(filename, flag)) == 0 ) + { + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + return make_image(10,10,3); + //exit(0); + } + image out = ipl_to_image(src); + cvReleaseImage(&src); + rgbgr_image(out); + return out; +} + image get_image_from_stream(CvCapture *cap) { IplImage* src = cvQueryFrame(cap); @@ -376,9 +500,7 @@ image get_image_from_stream(CvCapture *cap) rgbgr_image(im); return im; } -#endif -#ifdef OPENCV void save_image_jpg(image p, const char *name) { image copy = copy_image(p); @@ -980,7 +1102,7 @@ void test_resize(char *filename) image aug = random_augment_image(im, 0, 320, 448, 320, .75); show_image(aug, "aug"); free_image(aug); - + float exposure = 1.15; float saturation = 1.15; @@ -1001,55 +1123,6 @@ void test_resize(char *filename) #endif } -#ifdef OPENCV -image ipl_to_image(IplImage* src) -{ - unsigned char *data = (unsigned char *)src->imageData; - int h = src->height; - int w = src->width; - int c = src->nChannels; - int step = src->widthStep; - image out = make_image(w, h, c); - int i, j, k, count=0;; - - for(k= 0; k < c; ++k){ - for(i = 0; i < h; ++i){ - for(j = 0; j < w; ++j){ - out.data[count++] = data[i*step + j*c + k]/255.; - } - } - } - return out; -} - -image load_image_cv(char *filename, int channels) -{ - IplImage* src = 0; - int flag = -1; - if (channels == 0) flag = -1; - else if (channels == 1) flag = 0; - else if (channels == 3) flag = 1; - else { - fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); - } - - if( (src = cvLoadImage(filename, flag)) == 0 ) - { - fprintf(stderr, "Cannot load image \"%s\"\n", filename); - char buff[256]; - sprintf(buff, "echo %s >> bad.list", filename); - system(buff); - return make_image(10,10,3); - //exit(0); - } - image out = ipl_to_image(src); - cvReleaseImage(&src); - rgbgr_image(out); - return out; -} - -#endif - image load_image_stb(char *filename, int channels) { @@ -1122,6 +1195,7 @@ float get_pixel_extend(image m, int x, int y, int c) } void set_pixel(image m, int x, int y, int c, float val) { + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; assert(x < m.w && y < m.h && c < m.c); m.data[c*m.h*m.w + y*m.w + x] = val; } @@ -1247,5 +1321,7 @@ void show_images(image *ims, int n, char *window) void free_image(image m) { - free(m.data); + if(m.data){ + free(m.data); + } } diff --git a/src/image.h b/src/image.h index e1248609..7e7ecf6d 100644 --- a/src/image.h +++ b/src/image.h @@ -8,11 +8,6 @@ #include #include "box.h" -#ifdef OPENCV -#include "opencv2/highgui/highgui_c.h" -#include "opencv2/imgproc/imgproc_c.h" -#endif - typedef struct { int h; int w; @@ -26,6 +21,7 @@ void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); void draw_bbox(image a, box bbox, int w, float r, float g, float b); void draw_label(image a, int r, int c, image label, const float *rgb); +void write_label(image a, int r, int c, image *characters, char *string, float *rgb); void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes); image image_distance(image a, image b); void scale_image(image m, float s); @@ -64,12 +60,6 @@ void show_images(image *ims, int n, char *window); void show_image_layers(image p, char *name); void show_image_collapsed(image p, char *name); -#ifdef OPENCV -void save_image_jpg(image p, const char *name); -image get_image_from_stream(CvCapture *cap); -image ipl_to_image(IplImage* src); -#endif - void print_image(image m); image make_image(int w, int h, int c); @@ -79,6 +69,7 @@ image float_to_image(int w, int h, int c, float *data); image copy_image(image p); image load_image(char *filename, int w, int h, int c); image load_image_color(char *filename, int w, int h); +image *load_alphabet(); float get_pixel(image m, int x, int y, int c); float get_pixel_extend(image m, int x, int y, int c); diff --git a/src/layer.h b/src/layer.h index 7dbbfb9f..ea6862b8 100644 --- a/src/layer.h +++ b/src/layer.h @@ -4,6 +4,8 @@ #include "activations.h" #include "stddef.h" +struct network_state; + struct layer; typedef struct layer layer; @@ -42,6 +44,12 @@ struct layer{ LAYER_TYPE type; ACTIVATION activation; COST_TYPE cost_type; + void (*forward) (struct layer, struct network_state); + void (*backward) (struct layer, struct network_state); + void (*update) (struct layer, int, float, float, float); + void (*forward_gpu) (struct layer, struct network_state); + void (*backward_gpu) (struct layer, struct network_state); + void (*update_gpu) (struct layer, int, float, float, float); int batch_normalize; int shortcut; int batch; diff --git a/src/local_layer.c b/src/local_layer.c index 3696f846..31f0ca6b 100644 --- a/src/local_layer.c +++ b/src/local_layer.c @@ -60,8 +60,16 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float)); l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + + l.forward = forward_local_layer; + l.backward = backward_local_layer; + l.update = update_local_layer; #ifdef GPU + l.forward_gpu = forward_local_layer_gpu; + l.backward_gpu = backward_local_layer_gpu; + l.update_gpu = update_local_layer_gpu; + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 3e0ea156..49cfeaf5 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -39,7 +39,11 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.indexes = calloc(output_size, sizeof(int)); l.output = calloc(output_size, sizeof(float)); l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_maxpool_layer; + l.backward = backward_maxpool_layer; #ifdef GPU + l.forward_gpu = forward_maxpool_layer_gpu; + l.backward_gpu = backward_maxpool_layer_gpu; l.indexes_gpu = cuda_make_int_array(output_size); l.output_gpu = cuda_make_array(l.output, output_size); l.delta_gpu = cuda_make_array(l.delta, output_size); diff --git a/src/network.c b/src/network.c index 72c89432..01b79622 100644 --- a/src/network.c +++ b/src/network.c @@ -15,7 +15,6 @@ #include "local_layer.h" #include "convolutional_layer.h" #include "activation_layer.h" -#include "deconvolutional_layer.h" #include "detection_layer.h" #include "region_layer.h" #include "normalization_layer.h" @@ -153,49 +152,7 @@ void forward_network(network net, network_state state) if(l.delta){ scal_cpu(l.outputs * l.batch, 0, l.delta, 1); } - if(l.type == CONVOLUTIONAL){ - forward_convolutional_layer(l, state); - } else if(l.type == DECONVOLUTIONAL){ - forward_deconvolutional_layer(l, state); - } else if(l.type == ACTIVE){ - forward_activation_layer(l, state); - } else if(l.type == LOCAL){ - forward_local_layer(l, state); - } else if(l.type == NORMALIZATION){ - forward_normalization_layer(l, state); - } else if(l.type == BATCHNORM){ - forward_batchnorm_layer(l, state); - } else if(l.type == DETECTION){ - forward_detection_layer(l, state); - } else if(l.type == REGION){ - forward_region_layer(l, state); - } else if(l.type == CONNECTED){ - forward_connected_layer(l, state); - } else if(l.type == RNN){ - forward_rnn_layer(l, state); - } else if(l.type == GRU){ - forward_gru_layer(l, state); - } else if(l.type == CRNN){ - forward_crnn_layer(l, state); - } else if(l.type == CROP){ - forward_crop_layer(l, state); - } else if(l.type == COST){ - forward_cost_layer(l, state); - } else if(l.type == SOFTMAX){ - forward_softmax_layer(l, state); - } else if(l.type == MAXPOOL){ - forward_maxpool_layer(l, state); - } else if(l.type == REORG){ - forward_reorg_layer(l, state); - } else if(l.type == AVGPOOL){ - forward_avgpool_layer(l, state); - } else if(l.type == DROPOUT){ - forward_dropout_layer(l, state); - } else if(l.type == ROUTE){ - forward_route_layer(l, net); - } else if(l.type == SHORTCUT){ - forward_shortcut_layer(l, state); - } + l.forward(l, state); state.input = l.output; } } @@ -207,29 +164,17 @@ void update_network(network net) float rate = get_current_rate(net); for(i = 0; i < net.n; ++i){ layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ - update_convolutional_layer(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == DECONVOLUTIONAL){ - update_deconvolutional_layer(l, rate, net.momentum, net.decay); - } else if(l.type == CONNECTED){ - update_connected_layer(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == RNN){ - update_rnn_layer(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == GRU){ - update_gru_layer(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == CRNN){ - update_crnn_layer(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == LOCAL){ - update_local_layer(l, update_batch, rate, net.momentum, net.decay); + if(l.update){ + l.update(l, update_batch, rate, net.momentum, net.decay); } } } float *get_network_output(network net) { - #ifdef GPU - if (gpu_index >= 0) return get_network_output_gpu(net); - #endif +#ifdef GPU + if (gpu_index >= 0) return get_network_output_gpu(net); +#endif int i; for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break; return net.layers[i].output; @@ -273,47 +218,7 @@ void backward_network(network net, network_state state) state.delta = prev.delta; } layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ - backward_convolutional_layer(l, state); - } else if(l.type == DECONVOLUTIONAL){ - backward_deconvolutional_layer(l, state); - } else if(l.type == ACTIVE){ - backward_activation_layer(l, state); - } else if(l.type == NORMALIZATION){ - backward_normalization_layer(l, state); - } else if(l.type == BATCHNORM){ - backward_batchnorm_layer(l, state); - } else if(l.type == MAXPOOL){ - if(i != 0) backward_maxpool_layer(l, state); - } else if(l.type == REORG){ - backward_reorg_layer(l, state); - } else if(l.type == AVGPOOL){ - backward_avgpool_layer(l, state); - } else if(l.type == DROPOUT){ - backward_dropout_layer(l, state); - } else if(l.type == DETECTION){ - backward_detection_layer(l, state); - } else if(l.type == REGION){ - backward_region_layer(l, state); - } else if(l.type == SOFTMAX){ - if(i != 0) backward_softmax_layer(l, state); - } else if(l.type == CONNECTED){ - backward_connected_layer(l, state); - } else if(l.type == RNN){ - backward_rnn_layer(l, state); - } else if(l.type == GRU){ - backward_gru_layer(l, state); - } else if(l.type == CRNN){ - backward_crnn_layer(l, state); - } else if(l.type == LOCAL){ - backward_local_layer(l, state); - } else if(l.type == COST){ - backward_cost_layer(l, state); - } else if(l.type == ROUTE){ - backward_route_layer(l, net); - } else if(l.type == SHORTCUT){ - backward_shortcut_layer(l, state); - } + l.backward(l, state); } } @@ -406,11 +311,11 @@ void set_batch_network(network *net, int b) int i; for(i = 0; i < net->n; ++i){ net->layers[i].batch = b; - #ifdef CUDNN +#ifdef CUDNN if(net->layers[i].type == CONVOLUTIONAL){ cudnn_convolutional_setup(net->layers + i); } - #endif +#endif } } diff --git a/src/network_kernels.cu b/src/network_kernels.cu index b7d1d2b7..e3190680 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -22,7 +22,6 @@ extern "C" { #include "region_layer.h" #include "convolutional_layer.h" #include "activation_layer.h" -#include "deconvolutional_layer.h" #include "maxpool_layer.h" #include "reorg_layer.h" #include "avgpool_layer.h" @@ -51,49 +50,7 @@ void forward_network_gpu(network net, network_state state) if(l.delta_gpu){ fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1); } - if(l.type == CONVOLUTIONAL){ - forward_convolutional_layer_gpu(l, state); - } else if(l.type == DECONVOLUTIONAL){ - forward_deconvolutional_layer_gpu(l, state); - } else if(l.type == ACTIVE){ - forward_activation_layer_gpu(l, state); - } else if(l.type == LOCAL){ - forward_local_layer_gpu(l, state); - } else if(l.type == DETECTION){ - forward_detection_layer_gpu(l, state); - } else if(l.type == REGION){ - forward_region_layer_gpu(l, state); - } else if(l.type == CONNECTED){ - forward_connected_layer_gpu(l, state); - } else if(l.type == RNN){ - forward_rnn_layer_gpu(l, state); - } else if(l.type == GRU){ - forward_gru_layer_gpu(l, state); - } else if(l.type == CRNN){ - forward_crnn_layer_gpu(l, state); - } else if(l.type == CROP){ - forward_crop_layer_gpu(l, state); - } else if(l.type == COST){ - forward_cost_layer_gpu(l, state); - } else if(l.type == SOFTMAX){ - forward_softmax_layer_gpu(l, state); - } else if(l.type == NORMALIZATION){ - forward_normalization_layer_gpu(l, state); - } else if(l.type == BATCHNORM){ - forward_batchnorm_layer_gpu(l, state); - } else if(l.type == MAXPOOL){ - forward_maxpool_layer_gpu(l, state); - } else if(l.type == REORG){ - forward_reorg_layer_gpu(l, state); - } else if(l.type == AVGPOOL){ - forward_avgpool_layer_gpu(l, state); - } else if(l.type == DROPOUT){ - forward_dropout_layer_gpu(l, state); - } else if(l.type == ROUTE){ - forward_route_layer_gpu(l, net); - } else if(l.type == SHORTCUT){ - forward_shortcut_layer_gpu(l, state); - } + l.forward_gpu(l, state); state.input = l.output_gpu; } } @@ -115,47 +72,7 @@ void backward_network_gpu(network net, network_state state) state.input = prev.output_gpu; state.delta = prev.delta_gpu; } - if(l.type == CONVOLUTIONAL){ - backward_convolutional_layer_gpu(l, state); - } else if(l.type == DECONVOLUTIONAL){ - backward_deconvolutional_layer_gpu(l, state); - } else if(l.type == ACTIVE){ - backward_activation_layer_gpu(l, state); - } else if(l.type == LOCAL){ - backward_local_layer_gpu(l, state); - } else if(l.type == MAXPOOL){ - if(i != 0) backward_maxpool_layer_gpu(l, state); - } else if(l.type == REORG){ - backward_reorg_layer_gpu(l, state); - } else if(l.type == AVGPOOL){ - if(i != 0) backward_avgpool_layer_gpu(l, state); - } else if(l.type == DROPOUT){ - backward_dropout_layer_gpu(l, state); - } else if(l.type == DETECTION){ - backward_detection_layer_gpu(l, state); - } else if(l.type == REGION){ - backward_region_layer_gpu(l, state); - } else if(l.type == NORMALIZATION){ - backward_normalization_layer_gpu(l, state); - } else if(l.type == BATCHNORM){ - backward_batchnorm_layer_gpu(l, state); - } else if(l.type == SOFTMAX){ - if(i != 0) backward_softmax_layer_gpu(l, state); - } else if(l.type == CONNECTED){ - backward_connected_layer_gpu(l, state); - } else if(l.type == RNN){ - backward_rnn_layer_gpu(l, state); - } else if(l.type == GRU){ - backward_gru_layer_gpu(l, state); - } else if(l.type == CRNN){ - backward_crnn_layer_gpu(l, state); - } else if(l.type == COST){ - backward_cost_layer_gpu(l, state); - } else if(l.type == ROUTE){ - backward_route_layer_gpu(l, net); - } else if(l.type == SHORTCUT){ - backward_shortcut_layer_gpu(l, state); - } + l.backward_gpu(l, state); } } @@ -166,20 +83,8 @@ void update_network_gpu(network net) float rate = get_current_rate(net); for(i = 0; i < net.n; ++i){ layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ - update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == DECONVOLUTIONAL){ - update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay); - } else if(l.type == CONNECTED){ - update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == GRU){ - update_gru_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == RNN){ - update_rnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == CRNN){ - update_crnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == LOCAL){ - update_local_layer_gpu(l, update_batch, rate, net.momentum, net.decay); + if(l.update_gpu){ + l.update_gpu(l, update_batch, rate, net.momentum, net.decay); } } } @@ -271,20 +176,8 @@ void update_layer(layer l, network net) { int update_batch = net.batch*net.subdivisions; float rate = get_current_rate(net); - if(l.type == CONVOLUTIONAL){ - update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == DECONVOLUTIONAL){ - update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay); - } else if(l.type == CONNECTED){ - update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == RNN){ - update_rnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == GRU){ - update_gru_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == CRNN){ - update_crnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay); - } else if(l.type == LOCAL){ - update_local_layer_gpu(l, update_batch, rate, net.momentum, net.decay); + if(l.update_gpu){ + l.update_gpu(l, update_batch, rate, net.momentum, net.decay); } } @@ -463,7 +356,7 @@ float train_networks(network *nets, int n, data d, int interval) } for(i = 0; i < n; ++i){ pthread_join(threads[i], 0); - printf("%f\n", errors[i]); + //printf("%f\n", errors[i]); sum += errors[i]; } if (get_current_batch(nets[0]) % interval == 0) { @@ -492,6 +385,7 @@ float *get_network_output_gpu(network net) float *network_predict_gpu(network net, float *input) { + cuda_set_device(net.gpu_index); int size = get_network_input_size(net) * net.batch; network_state state; state.index = 0; diff --git a/src/normalization_layer.c b/src/normalization_layer.c index 0551337a..069a0792 100644 --- a/src/normalization_layer.c +++ b/src/normalization_layer.c @@ -21,7 +21,13 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a layer.norms = calloc(h * w * c * batch, sizeof(float)); layer.inputs = w*h*c; layer.outputs = layer.inputs; + + layer.forward = forward_normalization_layer; + layer.backward = backward_normalization_layer; #ifdef GPU + layer.forward_gpu = forward_normalization_layer_gpu; + layer.backward_gpu = backward_normalization_layer_gpu; + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); diff --git a/src/parser.c b/src/parser.c index 2b285b51..a27d2459 100644 --- a/src/parser.c +++ b/src/parser.c @@ -12,7 +12,6 @@ #include "activation_layer.h" #include "normalization_layer.h" #include "batchnorm_layer.h" -#include "deconvolutional_layer.h" #include "connected_layer.h" #include "rnn_layer.h" #include "gru_layer.h" @@ -36,30 +35,42 @@ typedef struct{ list *options; }section; -int is_network(section *s); -int is_convolutional(section *s); -int is_activation(section *s); -int is_local(section *s); -int is_deconvolutional(section *s); -int is_connected(section *s); -int is_rnn(section *s); -int is_gru(section *s); -int is_crnn(section *s); -int is_maxpool(section *s); -int is_reorg(section *s); -int is_avgpool(section *s); -int is_dropout(section *s); -int is_softmax(section *s); -int is_normalization(section *s); -int is_batchnorm(section *s); -int is_crop(section *s); -int is_shortcut(section *s); -int is_cost(section *s); -int is_detection(section *s); -int is_region(section *s); -int is_route(section *s); list *read_cfg(char *filename); +LAYER_TYPE string_to_layer_type(char * type) +{ + + if (strcmp(type, "[shortcut]")==0) return SHORTCUT; + if (strcmp(type, "[crop]")==0) return CROP; + if (strcmp(type, "[cost]")==0) return COST; + if (strcmp(type, "[detection]")==0) return DETECTION; + if (strcmp(type, "[region]")==0) return REGION; + if (strcmp(type, "[local]")==0) return LOCAL; + if (strcmp(type, "[conv]")==0 + || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; + if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[net]")==0 + || strcmp(type, "[network]")==0) return NETWORK; + if (strcmp(type, "[crnn]")==0) return CRNN; + if (strcmp(type, "[gru]")==0) return GRU; + if (strcmp(type, "[rnn]")==0) return RNN; + if (strcmp(type, "[conn]")==0 + || strcmp(type, "[connected]")==0) return CONNECTED; + if (strcmp(type, "[max]")==0 + || strcmp(type, "[maxpool]")==0) return MAXPOOL; + if (strcmp(type, "[reorg]")==0) return REORG; + if (strcmp(type, "[avg]")==0 + || strcmp(type, "[avgpool]")==0) return AVGPOOL; + if (strcmp(type, "[dropout]")==0) return DROPOUT; + if (strcmp(type, "[lrn]")==0 + || strcmp(type, "[normalization]")==0) return NORMALIZATION; + if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; + if (strcmp(type, "[soft]")==0 + || strcmp(type, "[softmax]")==0) return SOFTMAX; + if (strcmp(type, "[route]")==0) return ROUTE; + return BLANK; +} + void free_section(section *s) { free(s->type); @@ -102,26 +113,6 @@ typedef struct size_params{ int time_steps; } size_params; -deconvolutional_layer parse_deconvolutional(list *options, size_params params) -{ - int n = option_find_int(options, "filters",1); - int size = option_find_int(options, "size",1); - int stride = option_find_int(options, "stride",1); - char *activation_s = option_find_str(options, "activation", "logistic"); - ACTIVATION activation = get_activation(activation_s); - - int batch,h,w,c; - h = params.h; - w = params.w; - c = params.c; - batch=params.batch; - if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); - - deconvolutional_layer layer = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation); - - return layer; -} - local_layer parse_local(list *options, size_params params) { int n = option_find_int(options, "filters",1); @@ -545,6 +536,12 @@ void parse_net_options(list *options, network *net) net->max_batches = option_find_int(options, "max_batches", 0); } +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} + network parse_network_cfg(char *filename) { list *sections = read_cfg(filename); @@ -576,47 +573,46 @@ network parse_network_cfg(char *filename) s = (section *)n->val; options = s->options; layer l = {0}; - if(is_convolutional(s)){ + LAYER_TYPE lt = string_to_layer_type(s->type); + if(lt == CONVOLUTIONAL){ l = parse_convolutional(options, params); - }else if(is_local(s)){ + }else if(lt == LOCAL){ l = parse_local(options, params); - }else if(is_activation(s)){ + }else if(lt == ACTIVE){ l = parse_activation(options, params); - }else if(is_deconvolutional(s)){ - l = parse_deconvolutional(options, params); - }else if(is_rnn(s)){ + }else if(lt == RNN){ l = parse_rnn(options, params); - }else if(is_gru(s)){ + }else if(lt == GRU){ l = parse_gru(options, params); - }else if(is_crnn(s)){ + }else if(lt == CRNN){ l = parse_crnn(options, params); - }else if(is_connected(s)){ + }else if(lt == CONNECTED){ l = parse_connected(options, params); - }else if(is_crop(s)){ + }else if(lt == CROP){ l = parse_crop(options, params); - }else if(is_cost(s)){ + }else if(lt == COST){ l = parse_cost(options, params); - }else if(is_region(s)){ + }else if(lt == REGION){ l = parse_region(options, params); - }else if(is_detection(s)){ + }else if(lt == DETECTION){ l = parse_detection(options, params); - }else if(is_softmax(s)){ + }else if(lt == SOFTMAX){ l = parse_softmax(options, params); - }else if(is_normalization(s)){ + }else if(lt == NORMALIZATION){ l = parse_normalization(options, params); - }else if(is_batchnorm(s)){ + }else if(lt == BATCHNORM){ l = parse_batchnorm(options, params); - }else if(is_maxpool(s)){ + }else if(lt == MAXPOOL){ l = parse_maxpool(options, params); - }else if(is_reorg(s)){ + }else if(lt == REORG){ l = parse_reorg(options, params); - }else if(is_avgpool(s)){ + }else if(lt == AVGPOOL){ l = parse_avgpool(options, params); - }else if(is_route(s)){ + }else if(lt == ROUTE){ l = parse_route(options, params, net); - }else if(is_shortcut(s)){ + }else if(lt == SHORTCUT){ l = parse_shortcut(options, params, net); - }else if(is_dropout(s)){ + }else if(lt == DROPOUT){ l = parse_dropout(options, params); l.output = net.layers[count-1].output; l.delta = net.layers[count-1].delta; @@ -660,142 +656,6 @@ network parse_network_cfg(char *filename) return net; } -LAYER_TYPE string_to_layer_type(char * type) -{ - - if (strcmp(type, "[shortcut]")==0) return SHORTCUT; - if (strcmp(type, "[crop]")==0) return CROP; - if (strcmp(type, "[cost]")==0) return COST; - if (strcmp(type, "[detection]")==0) return DETECTION; - if (strcmp(type, "[region]")==0) return REGION; - if (strcmp(type, "[local]")==0) return LOCAL; - if (strcmp(type, "[deconv]")==0 - || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; - if (strcmp(type, "[conv]")==0 - || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; - if (strcmp(type, "[activation]")==0) return ACTIVE; - if (strcmp(type, "[net]")==0 - || strcmp(type, "[network]")==0) return NETWORK; - if (strcmp(type, "[crnn]")==0) return CRNN; - if (strcmp(type, "[gru]")==0) return GRU; - if (strcmp(type, "[rnn]")==0) return RNN; - if (strcmp(type, "[conn]")==0 - || strcmp(type, "[connected]")==0) return CONNECTED; - if (strcmp(type, "[max]")==0 - || strcmp(type, "[maxpool]")==0) return MAXPOOL; - if (strcmp(type, "[reorg]")==0) return REORG; - if (strcmp(type, "[avg]")==0 - || strcmp(type, "[avgpool]")==0) return AVGPOOL; - if (strcmp(type, "[dropout]")==0) return DROPOUT; - if (strcmp(type, "[lrn]")==0 - || strcmp(type, "[normalization]")==0) return NORMALIZATION; - if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; - if (strcmp(type, "[soft]")==0 - || strcmp(type, "[softmax]")==0) return SOFTMAX; - if (strcmp(type, "[route]")==0) return ROUTE; - return BLANK; -} - -int is_shortcut(section *s) -{ - return (strcmp(s->type, "[shortcut]")==0); -} -int is_crop(section *s) -{ - return (strcmp(s->type, "[crop]")==0); -} -int is_cost(section *s) -{ - return (strcmp(s->type, "[cost]")==0); -} -int is_region(section *s) -{ - return (strcmp(s->type, "[region]")==0); -} -int is_detection(section *s) -{ - return (strcmp(s->type, "[detection]")==0); -} -int is_local(section *s) -{ - return (strcmp(s->type, "[local]")==0); -} -int is_deconvolutional(section *s) -{ - return (strcmp(s->type, "[deconv]")==0 - || strcmp(s->type, "[deconvolutional]")==0); -} -int is_convolutional(section *s) -{ - return (strcmp(s->type, "[conv]")==0 - || strcmp(s->type, "[convolutional]")==0); -} -int is_activation(section *s) -{ - return (strcmp(s->type, "[activation]")==0); -} -int is_network(section *s) -{ - return (strcmp(s->type, "[net]")==0 - || strcmp(s->type, "[network]")==0); -} -int is_crnn(section *s) -{ - return (strcmp(s->type, "[crnn]")==0); -} -int is_gru(section *s) -{ - return (strcmp(s->type, "[gru]")==0); -} -int is_rnn(section *s) -{ - return (strcmp(s->type, "[rnn]")==0); -} -int is_connected(section *s) -{ - return (strcmp(s->type, "[conn]")==0 - || strcmp(s->type, "[connected]")==0); -} -int is_reorg(section *s) -{ - return (strcmp(s->type, "[reorg]")==0); -} -int is_maxpool(section *s) -{ - return (strcmp(s->type, "[max]")==0 - || strcmp(s->type, "[maxpool]")==0); -} -int is_avgpool(section *s) -{ - return (strcmp(s->type, "[avg]")==0 - || strcmp(s->type, "[avgpool]")==0); -} -int is_dropout(section *s) -{ - return (strcmp(s->type, "[dropout]")==0); -} - -int is_normalization(section *s) -{ - return (strcmp(s->type, "[lrn]")==0 - || strcmp(s->type, "[normalization]")==0); -} - -int is_batchnorm(section *s) -{ - return (strcmp(s->type, "[batchnorm]")==0); -} - -int is_softmax(section *s) -{ - return (strcmp(s->type, "[soft]")==0 - || strcmp(s->type, "[softmax]")==0); -} -int is_route(section *s) -{ - return (strcmp(s->type, "[route]")==0); -} - list *read_cfg(char *filename) { FILE *file = fopen(filename, "r"); @@ -831,45 +691,6 @@ list *read_cfg(char *filename) return sections; } -void save_weights_double(network net, char *filename) -{ - fprintf(stderr, "Saving doubled weights to %s\n", filename); - FILE *fp = fopen(filename, "w"); - if(!fp) file_error(filename); - - fwrite(&net.learning_rate, sizeof(float), 1, fp); - fwrite(&net.momentum, sizeof(float), 1, fp); - fwrite(&net.decay, sizeof(float), 1, fp); - fwrite(net.seen, sizeof(int), 1, fp); - - int i,j,k; - for(i = 0; i < net.n; ++i){ - layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ -#ifdef GPU - if(gpu_index >= 0){ - pull_convolutional_layer(l); - } -#endif - float zero = 0; - fwrite(l.biases, sizeof(float), l.n, fp); - fwrite(l.biases, sizeof(float), l.n, fp); - - for (j = 0; j < l.n; ++j){ - int index = j*l.c*l.size*l.size; - fwrite(l.weights+index, sizeof(float), l.c*l.size*l.size, fp); - for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp); - } - for (j = 0; j < l.n; ++j){ - int index = j*l.c*l.size*l.size; - for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp); - fwrite(l.weights+index, sizeof(float), l.c*l.size*l.size, fp); - } - } - } - fclose(fp); -} - void save_convolutional_weights_binary(layer l, FILE *fp) { #ifdef GPU @@ -1147,16 +968,6 @@ void load_weights_upto(network *net, char *filename, int cutoff) if(l.type == CONVOLUTIONAL){ load_convolutional_weights(l, fp); } - if(l.type == DECONVOLUTIONAL){ - int num = l.n*l.c*l.size*l.size; - fread(l.biases, sizeof(float), l.n, fp); - fread(l.weights, sizeof(float), num, fp); -#ifdef GPU - if(gpu_index >= 0){ - push_deconvolutional_layer(l); - } -#endif - } if(l.type == CONNECTED){ load_connected_weights(l, fp, transpose); } diff --git a/src/region_layer.c b/src/region_layer.c index 24d31690..bc3acaae 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -34,7 +34,11 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int l.biases[i] = .5; } + l.forward = forward_region_layer; + l.backward = backward_region_layer; #ifdef GPU + l.forward_gpu = forward_region_layer_gpu; + l.backward_gpu = backward_region_layer_gpu; l.output_gpu = cuda_make_array(l.output, batch*l.outputs); l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); #endif @@ -228,6 +232,45 @@ void backward_region_layer(const region_layer l, network_state state) axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); } +void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness) +{ + int i,j,n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int index = i*l.n + n; + int p_index = index * (l.classes + 5) + 4; + float scale = predictions[p_index]; + int box_index = index * (l.classes + 5); + boxes[index].x = (predictions[box_index + 0] + col + .5) / l.w * w; + boxes[index].y = (predictions[box_index + 1] + row + .5) / l.h * h; + if(0){ + boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / l.w * w; + boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / l.h * h; + } + boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (l.sqrt?2:1)) * w; + boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (l.sqrt?2:1)) * h; + if(1){ + boxes[index].x = ((col + .5)/l.w + predictions[box_index + 0] * .5) * w; + boxes[index].y = ((row + .5)/l.h + predictions[box_index + 1] * .5) * h; + boxes[index].w = (exp(predictions[box_index + 2]) * .5) * w; + boxes[index].h = (exp(predictions[box_index + 3]) * .5) * h; + } + for(j = 0; j < l.classes; ++j){ + int class_index = index * (l.classes + 5) + 5; + float prob = scale*predictions[class_index+j]; + probs[index][j] = (prob > thresh) ? prob : 0; + } + if(only_objectness){ + probs[index][0] = scale; + } + } + } +} + #ifdef GPU void forward_region_layer_gpu(const region_layer l, network_state state) diff --git a/src/region_layer.h b/src/region_layer.h index a4156fd0..01901e07 100644 --- a/src/region_layer.h +++ b/src/region_layer.h @@ -9,6 +9,7 @@ typedef layer region_layer; region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords); void forward_region_layer(const region_layer l, network_state state); void backward_region_layer(const region_layer l, network_state state); +void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); #ifdef GPU void forward_region_layer_gpu(const region_layer l, network_state state); diff --git a/src/reorg_layer.c b/src/reorg_layer.c index 55b425f1..5bc257a3 100644 --- a/src/reorg_layer.c +++ b/src/reorg_layer.c @@ -22,7 +22,13 @@ layer make_reorg_layer(int batch, int h, int w, int c, int stride) int output_size = l.out_h * l.out_w * l.out_c * batch; l.output = calloc(output_size, sizeof(float)); l.delta = calloc(output_size, sizeof(float)); + + l.forward = forward_reorg_layer; + l.backward = backward_reorg_layer; #ifdef GPU + l.forward_gpu = forward_reorg_layer_gpu; + l.backward_gpu = backward_reorg_layer_gpu; + l.output_gpu = cuda_make_array(l.output, output_size); l.delta_gpu = cuda_make_array(l.delta, output_size); #endif diff --git a/src/rnn_layer.c b/src/rnn_layer.c index b713899c..83fda13e 100644 --- a/src/rnn_layer.c +++ b/src/rnn_layer.c @@ -58,7 +58,13 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, l.output = l.output_layer->output; l.delta = l.output_layer->delta; + l.forward = forward_rnn_layer; + l.backward = backward_rnn_layer; + l.update = update_rnn_layer; #ifdef GPU + l.forward_gpu = forward_rnn_layer_gpu; + l.backward_gpu = backward_rnn_layer_gpu; + l.update_gpu = update_rnn_layer_gpu; l.state_gpu = cuda_make_array(l.state, batch*hidden*(steps+1)); l.output_gpu = l.output_layer->output_gpu; l.delta_gpu = l.output_layer->delta_gpu; diff --git a/src/rnn_layer.h b/src/rnn_layer.h index 9e19cee1..bb9478b9 100644 --- a/src/rnn_layer.h +++ b/src/rnn_layer.h @@ -1,23 +1,24 @@ -#ifndef GRU_LAYER_H -#define GRU_LAYER_H +#ifndef RNN_LAYER_H +#define RNN_LAYER_H #include "activations.h" #include "layer.h" #include "network.h" +#define USET -layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize); +layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log); -void forward_gru_layer(layer l, network_state state); -void backward_gru_layer(layer l, network_state state); -void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay); +void forward_rnn_layer(layer l, network_state state); +void backward_rnn_layer(layer l, network_state state); +void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay); #ifdef GPU -void forward_gru_layer_gpu(layer l, network_state state); -void backward_gru_layer_gpu(layer l, network_state state); -void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); -void push_gru_layer(layer l); -void pull_gru_layer(layer l); +void forward_rnn_layer_gpu(layer l, network_state state); +void backward_rnn_layer_gpu(layer l, network_state state); +void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); +void push_rnn_layer(layer l); +void pull_rnn_layer(layer l); #endif #endif diff --git a/src/rnn_vid.c b/src/rnn_vid.c index bf024f9c..36912d6b 100644 --- a/src/rnn_vid.c +++ b/src/rnn_vid.c @@ -6,6 +6,8 @@ #ifdef OPENCV #include "opencv2/highgui/highgui_c.h" +image get_image_from_stream(CvCapture *cap); +image ipl_to_image(IplImage* src); void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); diff --git a/src/route_layer.c b/src/route_layer.c index df50b64f..47e3d703 100644 --- a/src/route_layer.c +++ b/src/route_layer.c @@ -23,20 +23,26 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz l.inputs = outputs; l.delta = calloc(outputs*batch, sizeof(float)); l.output = calloc(outputs*batch, sizeof(float));; + + l.forward = forward_route_layer; + l.backward = backward_route_layer; #ifdef GPU + l.forward_gpu = forward_route_layer_gpu; + l.backward_gpu = backward_route_layer_gpu; + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); l.output_gpu = cuda_make_array(l.output, outputs*batch); #endif return l; } -void forward_route_layer(const route_layer l, network net) +void forward_route_layer(const route_layer l, network_state state) { int i, j; int offset = 0; for(i = 0; i < l.n; ++i){ int index = l.input_layers[i]; - float *input = net.layers[index].output; + float *input = state.net.layers[index].output; int input_size = l.input_sizes[i]; for(j = 0; j < l.batch; ++j){ copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); @@ -45,13 +51,13 @@ void forward_route_layer(const route_layer l, network net) } } -void backward_route_layer(const route_layer l, network net) +void backward_route_layer(const route_layer l, network_state state) { int i, j; int offset = 0; for(i = 0; i < l.n; ++i){ int index = l.input_layers[i]; - float *delta = net.layers[index].delta; + float *delta = state.net.layers[index].delta; int input_size = l.input_sizes[i]; for(j = 0; j < l.batch; ++j){ axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); @@ -61,13 +67,13 @@ void backward_route_layer(const route_layer l, network net) } #ifdef GPU -void forward_route_layer_gpu(const route_layer l, network net) +void forward_route_layer_gpu(const route_layer l, network_state state) { int i, j; int offset = 0; for(i = 0; i < l.n; ++i){ int index = l.input_layers[i]; - float *input = net.layers[index].output_gpu; + float *input = state.net.layers[index].output_gpu; int input_size = l.input_sizes[i]; for(j = 0; j < l.batch; ++j){ copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); @@ -76,13 +82,13 @@ void forward_route_layer_gpu(const route_layer l, network net) } } -void backward_route_layer_gpu(const route_layer l, network net) +void backward_route_layer_gpu(const route_layer l, network_state state) { int i, j; int offset = 0; for(i = 0; i < l.n; ++i){ int index = l.input_layers[i]; - float *delta = net.layers[index].delta_gpu; + float *delta = state.net.layers[index].delta_gpu; int input_size = l.input_sizes[i]; for(j = 0; j < l.batch; ++j){ axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); diff --git a/src/route_layer.h b/src/route_layer.h index 1f0d6e32..77245a63 100644 --- a/src/route_layer.h +++ b/src/route_layer.h @@ -6,12 +6,12 @@ typedef layer route_layer; route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); -void forward_route_layer(const route_layer l, network net); -void backward_route_layer(const route_layer l, network net); +void forward_route_layer(const route_layer l, network_state state); +void backward_route_layer(const route_layer l, network_state state); #ifdef GPU -void forward_route_layer_gpu(const route_layer l, network net); -void backward_route_layer_gpu(const route_layer l, network net); +void forward_route_layer_gpu(const route_layer l, network_state state); +void backward_route_layer_gpu(const route_layer l, network_state state); #endif #endif diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index bf455162..8bca50fb 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -23,7 +23,13 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.delta = calloc(l.outputs*batch, sizeof(float)); l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_shortcut_layer; + l.backward = backward_shortcut_layer; #ifdef GPU + l.forward_gpu = forward_shortcut_layer_gpu; + l.backward_gpu = backward_shortcut_layer_gpu; + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); l.output_gpu = cuda_make_array(l.output, l.outputs*batch); #endif diff --git a/src/softmax_layer.c b/src/softmax_layer.c index e189701f..20bc07f3 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -19,7 +19,13 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups) l.outputs = inputs; l.output = calloc(inputs*batch, sizeof(float)); l.delta = calloc(inputs*batch, sizeof(float)); + + l.forward = forward_softmax_layer; + l.backward = backward_softmax_layer; #ifdef GPU + l.forward_gpu = forward_softmax_layer_gpu; + l.backward_gpu = backward_softmax_layer_gpu; + l.output_gpu = cuda_make_array(l.output, inputs*batch); l.delta_gpu = cuda_make_array(l.delta, inputs*batch); #endif diff --git a/src/utils.c b/src/utils.c index 55f64b8c..e8128b91 100644 --- a/src/utils.c +++ b/src/utils.c @@ -135,23 +135,20 @@ void pm(int M, int N, float *A) printf("\n"); } -char *find_replace(char *str, char *orig, char *rep) +void find_replace(char *str, char *orig, char *rep, char *output) { - static char buffer[4096]; - static char buffer2[4096]; - static char buffer3[4096]; + char buffer[4096] = {0}; char *p; - if(!(p = strstr(str, orig))) // Is 'orig' even in 'str'? - return str; + sprintf(buffer, "%s", str); + if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? + sprintf(output, "%s", str); + return; + } - strncpy(buffer2, str, p-str); // Copy characters from 'str' start to 'orig' st$ - buffer2[p-str] = '\0'; + *p = '\0'; - sprintf(buffer3, "%s%s%s", buffer2, rep, p+strlen(orig)); - sprintf(buffer, "%s", buffer3); - - return buffer; + sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); } float sec(clock_t clocks) diff --git a/src/utils.h b/src/utils.h index 185e5e31..46676344 100644 --- a/src/utils.h +++ b/src/utils.h @@ -19,7 +19,7 @@ void read_all(int fd, char *buffer, size_t bytes); void write_all(int fd, char *buffer, size_t bytes); int read_all_fail(int fd, char *buffer, size_t bytes); int write_all_fail(int fd, char *buffer, size_t bytes); -char *find_replace(char *str, char *orig, char *rep); +void find_replace(char *str, char *orig, char *rep, char *output); void error(const char *s); void malloc_error(); void file_error(char *s); diff --git a/src/voxel.c b/src/voxel.c index c277bcf2..1b53880c 100644 --- a/src/voxel.c +++ b/src/voxel.c @@ -5,6 +5,7 @@ #ifdef OPENCV #include "opencv2/highgui/highgui_c.h" +image get_image_from_stream(CvCapture *cap); #endif void extract_voxel(char *lfile, char *rfile, char *prefix) diff --git a/src/xnor_layer.c b/src/xnor_layer.c deleted file mode 100644 index e2fca7e8..00000000 --- a/src/xnor_layer.c +++ /dev/null @@ -1,86 +0,0 @@ -#include "xnor_layer.h" -#include "binary_convolution.h" -#include "convolutional_layer.h" - -layer make_xnor_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize) -{ - int i; - layer l = {0}; - l.type = XNOR; - - l.h = h; - l.w = w; - l.c = c; - l.n = n; - l.batch = batch; - l.stride = stride; - l.size = size; - l.pad = pad; - l.batch_normalize = batch_normalize; - - l.filters = calloc(c*n*size*size, sizeof(float)); - l.biases = calloc(n, sizeof(float)); - - int out_h = convolutional_out_height(l); - int out_w = convolutional_out_width(l); - l.out_h = out_h; - l.out_w = out_w; - l.out_c = n; - l.outputs = l.out_h * l.out_w * l.out_c; - l.inputs = l.w * l.h * l.c; - - l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); - - if(batch_normalize){ - l.scales = calloc(n, sizeof(float)); - for(i = 0; i < n; ++i){ - l.scales[i] = 1; - } - - l.mean = calloc(n, sizeof(float)); - l.variance = calloc(n, sizeof(float)); - - l.rolling_mean = calloc(n, sizeof(float)); - l.rolling_variance = calloc(n, sizeof(float)); - } - - l.activation = activation; - - fprintf(stderr, "XNOR Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); - - return l; -} - -void forward_xnor_layer(const layer l, network_state state) -{ - int b = l.n; - int c = l.c; - int ix = l.w; - int iy = l.h; - int wx = l.size; - int wy = l.size; - int s = l.stride; - int pad = l.pad * (l.size/2); - - // MANDATORY: Make the binary layer - ai2_bin_conv_layer al = ai2_make_bin_conv_layer(b, c, ix, iy, wx, wy, s, pad); - - // OPTIONAL: You need to set the real-valued input like: - ai2_setFltInput_unpadded(&al, state.input); - // The above function will automatically binarize the input for the layer (channel wise). - // If commented: using the default 0-valued input. - - ai2_setFltWeights(&al, l.filters); - // The above function will automatically binarize the input for the layer (channel wise). - // If commented: using the default 0-valued weights. - - // MANDATORY: Call forward - ai2_bin_forward(&al); - - // OPTIONAL: Inspect outputs - float *output = ai2_getFltOutput(&al); // output is of size l.px * l.py where px and py are the padded outputs - - memcpy(l.output, output, l.outputs*sizeof(float)); - // MANDATORY: Free layer - ai2_free_bin_conv_layer(&al); -} diff --git a/src/xnor_layer.h b/src/xnor_layer.h deleted file mode 100644 index f1c5b687..00000000 --- a/src/xnor_layer.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef XNOR_LAYER_H -#define XNOR_LAYER_H - -#include "layer.h" -#include "network.h" - -layer make_xnor_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalization); -void forward_xnor_layer(const layer l, network_state state); - -#endif - diff --git a/src/yolo.c b/src/yolo.c index 2465a2cd..82faffd0 100644 --- a/src/yolo.c +++ b/src/yolo.c @@ -11,7 +11,6 @@ #endif char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; -image voc_labels[20]; void train_yolo(char *cfgfile, char *weightfile) { @@ -88,34 +87,6 @@ void train_yolo(char *cfgfile, char *weightfile) save_weights(net, buff); } -void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness) -{ - int i,j,n; - //int per_cell = 5*num+classes; - for (i = 0; i < side*side; ++i){ - int row = i / side; - int col = i % side; - for(n = 0; n < num; ++n){ - int index = i*num + n; - int p_index = side*side*classes + i*num + n; - float scale = predictions[p_index]; - int box_index = side*side*(classes + num) + (i*num + n)*4; - boxes[index].x = (predictions[box_index + 0] + col) / side * w; - boxes[index].y = (predictions[box_index + 1] + row) / side * h; - boxes[index].w = pow(predictions[box_index + 2], (square?2:1)) * w; - boxes[index].h = pow(predictions[box_index + 3], (square?2:1)) * h; - for(j = 0; j < classes; ++j){ - int class_index = i*classes; - float prob = scale*predictions[class_index+j]; - probs[index][j] = (prob > thresh) ? prob : 0; - } - if(only_objectness){ - probs[index][0] = scale; - } - } - } -} - void print_yolo_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h) { int i, j; @@ -155,8 +126,6 @@ void validate_yolo(char *cfgfile, char *weightfile) layer l = net.layers[net.n-1]; int classes = l.classes; - int square = l.sqrt; - int side = l.side; int j; FILE **fps = calloc(classes, sizeof(FILE *)); @@ -165,9 +134,9 @@ void validate_yolo(char *cfgfile, char *weightfile) snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); fps[j] = fopen(buff, "w"); } - box *boxes = calloc(side*side*l.n, sizeof(box)); - float **probs = calloc(side*side*l.n, sizeof(float *)); - for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); + box *boxes = calloc(l.side*l.side*l.n, sizeof(box)); + float **probs = calloc(l.side*l.side*l.n, sizeof(float *)); + for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *)); int m = plist->size; int i=0; @@ -213,12 +182,12 @@ void validate_yolo(char *cfgfile, char *weightfile) char *path = paths[i+t-nthreads]; char *id = basecfg(path); float *X = val_resized[t].data; - float *predictions = network_predict(net, X); + network_predict(net, X); int w = val[t].w; int h = val[t].h; - convert_detections(predictions, classes, l.n, square, side, w, h, thresh, probs, boxes, 0); - if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, iou_thresh); - print_yolo_detections(fps, id, boxes, probs, side*side*l.n, classes, w, h); + get_detection_boxes(l, w, h, thresh, probs, boxes, 0); + if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, classes, iou_thresh); + print_yolo_detections(fps, id, boxes, probs, l.side*l.side*l.n, classes, w, h); free(id); free_image(val[t]); free_image(val_resized[t]); @@ -243,7 +212,6 @@ void validate_yolo_recall(char *cfgfile, char *weightfile) layer l = net.layers[net.n-1]; int classes = l.classes; - int square = l.sqrt; int side = l.side; int j, k; @@ -274,14 +242,15 @@ void validate_yolo_recall(char *cfgfile, char *weightfile) image orig = load_image_color(path, 0, 0); image sized = resize_image(orig, net.w, net.h); char *id = basecfg(path); - float *predictions = network_predict(net, sized.data); - convert_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1); + network_predict(net, sized.data); + get_detection_boxes(l, orig.w, orig.h, thresh, probs, boxes, 1); if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms); - char *labelpath = find_replace(path, "images", "labels"); - labelpath = find_replace(labelpath, "JPEGImages", "labels"); - labelpath = find_replace(labelpath, ".jpg", ".txt"); - labelpath = find_replace(labelpath, ".JPEG", ".txt"); + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); @@ -315,7 +284,7 @@ void validate_yolo_recall(char *cfgfile, char *weightfile) void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) { - + image *alphabet = load_alphabet(); network net = parse_network_cfg(cfgfile); if(weightfile){ load_weights(&net, weightfile); @@ -345,12 +314,12 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) image sized = resize_image(im, net.w, net.h); float *X = sized.data; time=clock(); - float *predictions = network_predict(net, X); + network_predict(net, X); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0); + get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1); if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms); - //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20); - draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20); + //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20); + draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20); save_image(im, "predictions"); show_image(im, "predictions"); @@ -366,13 +335,7 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) void run_yolo(int argc, char **argv) { - int i; - for(i = 0; i < 20; ++i){ - char buff[256]; - sprintf(buff, "data/labels/%s.png", voc_names[i]); - voc_labels[i] = load_image_color(buff, 0, 0); - } - + char *prefix = find_char_arg(argc, argv, "-prefix", 0); float thresh = find_float_arg(argc, argv, "-thresh", .2); int cam_index = find_int_arg(argc, argv, "-c", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0); @@ -388,5 +351,5 @@ void run_yolo(int argc, char **argv) else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); - else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, voc_labels, 20, frame_skip); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix); }