diff --git a/Makefile b/Makefile index 1b6aa804..d5c75e0f 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ endif OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o ifeq ($(GPU), 1) -OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o +OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o coco_kernels.o endif OBJS = $(addprefix $(OBJDIR), $(OBJ)) diff --git a/cfg/yolo-coco.cfg b/cfg/yolo-coco.cfg new file mode 100644 index 00000000..0c13a312 --- /dev/null +++ b/cfg/yolo-coco.cfg @@ -0,0 +1,240 @@ +[net] +batch=64 +subdivisions=4 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0001 +policy=steps +steps=100,200,300,100000,150000 +scales=2.5,2,2,.1,.1 +max_batches = 300000 + +[crop] +crop_width=448 +crop_height=448 +flip=0 +angle=0 +saturation = 1.5 +exposure = 1.5 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +################################# + + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=192 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 4410 +activation=linear + +[detection] +classes=80 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 0ab9fd96..d5607daa 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -10,6 +10,7 @@ extern "C" { __device__ float linear_activate_kernel(float x){return x;} __device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));} __device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} __device__ float relie_activate_kernel(float x){return x*(x>0);} __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;} __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;} @@ -24,6 +25,7 @@ __device__ float plse_activate_kernel(float x) __device__ float linear_gradient_kernel(float x){return 1;} __device__ float logistic_gradient_kernel(float x){return (1-x)*x;} __device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;} __device__ float ramp_gradient_kernel(float x){return (x>0)+.1;} __device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;} @@ -39,6 +41,8 @@ __device__ float activate_kernel(float x, ACTIVATION a) return logistic_activate_kernel(x); case RELU: return relu_activate_kernel(x); + case ELU: + return elu_activate_kernel(x); case RELIE: return relie_activate_kernel(x); case RAMP: @@ -62,6 +66,8 @@ __device__ float gradient_kernel(float x, ACTIVATION a) return logistic_gradient_kernel(x); case RELU: return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); case RELIE: return relie_gradient_kernel(x); case RAMP: diff --git a/src/activations.c b/src/activations.c index d31b1e41..5a62ef51 100644 --- a/src/activations.c +++ b/src/activations.c @@ -12,6 +12,8 @@ char *get_activation_string(ACTIVATION a) return "logistic"; case RELU: return "relu"; + case ELU: + return "elu"; case RELIE: return "relie"; case RAMP: @@ -34,6 +36,7 @@ ACTIVATION get_activation(char *s) { if (strcmp(s, "logistic")==0) return LOGISTIC; if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "elu")==0) return ELU; if (strcmp(s, "relie")==0) return RELIE; if (strcmp(s, "plse")==0) return PLSE; if (strcmp(s, "linear")==0) return LINEAR; @@ -53,6 +56,8 @@ float activate(float x, ACTIVATION a) return logistic_activate(x); case RELU: return relu_activate(x); + case ELU: + return elu_activate(x); case RELIE: return relie_activate(x); case RAMP: @@ -84,6 +89,8 @@ float gradient(float x, ACTIVATION a) return logistic_gradient(x); case RELU: return relu_gradient(x); + case ELU: + return elu_gradient(x); case RELIE: return relie_gradient(x); case RAMP: diff --git a/src/activations.h b/src/activations.h index 22a713af..d824d1e7 100644 --- a/src/activations.h +++ b/src/activations.h @@ -4,7 +4,7 @@ #include "math.h" typedef enum{ - LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU }ACTIVATION; ACTIVATION get_activation(char *s); @@ -22,6 +22,7 @@ void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta); static inline float linear_activate(float x){return x;} static inline float logistic_activate(float x){return 1./(1. + exp(-x));} static inline float relu_activate(float x){return x*(x>0);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} static inline float relie_activate(float x){return x*(x>0);} static inline float ramp_activate(float x){return x*(x>0)+.1*x;} static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} @@ -36,6 +37,7 @@ static inline float plse_activate(float x) static inline float linear_gradient(float x){return 1;} static inline float logistic_gradient(float x){return (1-x)*x;} static inline float relu_gradient(float x){return (x>0);} +static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} static inline float ramp_gradient(float x){return (x>0)+.1;} static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} diff --git a/src/coco.c b/src/coco.c index cef6adea..17d06540 100644 --- a/src/coco.c +++ b/src/coco.c @@ -15,30 +15,7 @@ char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus"," int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; -void draw_coco(image im, int num, float thresh, box *boxes, float **probs) -{ - int classes = 80; - int i; - - for(i = 0; i < num; ++i){ - int class = max_index(probs[i], classes); - float prob = probs[i][class]; - if(prob > thresh){ - int width = sqrt(prob)*5 + 1; - printf("%f %s\n", prob, coco_classes[class]); - float red = get_color(0,class,classes); - float green = get_color(1,class,classes); - float blue = get_color(2,class,classes); - box b = boxes[i]; - - int left = (b.x-b.w/2.)*im.w; - int right = (b.x+b.w/2.)*im.w; - int top = (b.y-b.h/2.)*im.h; - int bot = (b.y+b.h/2.)*im.h; - draw_box_width(im, left, top, right, bot, width, red, green, blue); - } - } -} +image coco_labels[80]; void train_coco(char *cfgfile, char *weightfile) { @@ -368,6 +345,7 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) detection_layer l = net.layers[net.n-1]; set_batch_network(&net, 1); srand(2222222); + float nms = .4; clock_t time; char buff[256]; char *input = buff; @@ -392,7 +370,8 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) float *predictions = network_predict(net, X); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); convert_coco_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0); - draw_coco(im, l.side*l.side*l.n, thresh, boxes, probs); + if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms); + draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80); show_image(im, "predictions"); show_image(sized, "resized"); @@ -406,9 +385,23 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) } } +#ifdef OPENCV +#ifdef GPU +void demo_coco(char *cfgfile, char *weightfile, float thresh, int cam_index); +#endif +#endif + void run_coco(int argc, char **argv) { + int i; + for(i = 0; i < 80; ++i){ + char buff[256]; + sprintf(buff, "data/labels/%s.png", coco_classes[i]); + coco_labels[i] = load_image_color(buff, 0, 0); + } float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + if(argc < 4){ fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; @@ -421,4 +414,9 @@ void run_coco(int argc, char **argv) else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); +#ifdef OPENCV +#ifdef GPU + else if(0==strcmp(argv[2], "demo")) demo_coco(cfg, weights, thresh, cam_index); +#endif +#endif } diff --git a/src/coco_kernels.cu b/src/coco_kernels.cu index a3b4875c..2ec09152 100644 --- a/src/coco_kernels.cu +++ b/src/coco_kernels.cu @@ -10,6 +10,7 @@ extern "C" { #include "parser.h" #include "box.h" #include "image.h" +#include } #ifdef OPENCV @@ -17,7 +18,9 @@ extern "C" { #include "opencv2/imgproc/imgproc.hpp" extern "C" image ipl_to_image(IplImage* src); extern "C" void convert_coco_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); -extern "C" void draw_coco(image im, int num, float thresh, box *boxes, float **probs); + +extern "C" char *coco_classes[]; +extern "C" image *coco_labels; static float **probs; static box *boxes; @@ -27,9 +30,10 @@ static image in_s ; static image det ; static image det_s; static image disp ; -static cv::VideoCapture cap(0); +static cv::VideoCapture cap; +static float fps = 0; -void *fetch_in_thread(void *ptr) +void *fetch_in_thread_coco(void *ptr) { cv::Mat frame_m; cap >> frame_m; @@ -40,7 +44,7 @@ void *fetch_in_thread(void *ptr) return 0; } -void *detect_in_thread(void *ptr) +void *detect_in_thread_coco(void *ptr) { float nms = .4; float thresh = .2; @@ -53,12 +57,13 @@ void *detect_in_thread(void *ptr) if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms); printf("\033[2J"); printf("\033[1;1H"); - printf("\nObjects:\n\n"); - draw_coco(det, l.side*l.side*l.n, thresh, boxes, probs); + printf("\nFPS:%.0f\n",fps); + printf("Objects:\n\n"); + draw_detections(det, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80); return 0; } -extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh) +extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh, int cam_index) { printf("YOLO demo\n"); net = parse_network_cfg(cfgfile); @@ -69,6 +74,8 @@ extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh) srand(2222222); + cv::VideoCapture cam(cam_index); + cap = cam; if(!cap.isOpened()) error("Couldn't connect to webcam.\n"); detection_layer l = net.layers[net.n-1]; @@ -81,19 +88,21 @@ extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh) pthread_t fetch_thread; pthread_t detect_thread; - fetch_in_thread(0); + fetch_in_thread_coco(0); det = in; det_s = in_s; - fetch_in_thread(0); - detect_in_thread(0); + fetch_in_thread_coco(0); + detect_in_thread_coco(0); disp = det; det = in; det_s = in_s; while(1){ - if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); - if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + if(pthread_create(&fetch_thread, 0, fetch_in_thread_coco, 0)) error("Thread creation failed"); + if(pthread_create(&detect_thread, 0, detect_in_thread_coco, 0)) error("Thread creation failed"); show_image(disp, "YOLO"); free_image(disp); cvWaitKey(1); @@ -103,10 +112,15 @@ extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh) disp = det; det = in; det_s = in_s; + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; } } #else -extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh){ +extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh, int cam_index){ fprintf(stderr, "YOLO-COCO demo needs OpenCV for webcam images.\n"); } #endif diff --git a/src/image.c b/src/image.c index 8497032a..a8a6684c 100644 --- a/src/image.c +++ b/src/image.c @@ -28,6 +28,26 @@ float get_color(int c, int x, int max) return r; } +void draw_label(image a, int r, int c, image label, const float *rgb) +{ + float ratio = (float) label.w / label.h; + int h = label.h; + int w = ratio * h; + image rl = resize_image(label, w, h); + if (r - h >= 0) r = r - h; + + int i, j, k; + for(j = 0; j < h && j + r < a.h; ++j){ + for(i = 0; i < w && i + c < a.w; ++i){ + for(k = 0; k < label.c; ++k){ + float val = get_pixel(rl, i, j, k); + set_pixel(a, i+c, j+r, k, rgb[k] * val); + } + } + } + free_image(rl); +} + void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) { //normalize_image(a); @@ -42,25 +62,25 @@ void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b if(y2 < 0) y2 = 0; if(y2 >= a.h) y2 = a.h-1; - for(i = x1; i < x2; ++i){ - a.data[i + y1*a.w + 0*a.w*a.h] = b; - a.data[i + y2*a.w + 0*a.w*a.h] = b; + for(i = x1; i <= x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = r; + a.data[i + y2*a.w + 0*a.w*a.h] = r; a.data[i + y1*a.w + 1*a.w*a.h] = g; a.data[i + y2*a.w + 1*a.w*a.h] = g; - a.data[i + y1*a.w + 2*a.w*a.h] = r; - a.data[i + y2*a.w + 2*a.w*a.h] = r; + a.data[i + y1*a.w + 2*a.w*a.h] = b; + a.data[i + y2*a.w + 2*a.w*a.h] = b; } - for(i = y1; i < y2; ++i){ - a.data[x1 + i*a.w + 0*a.w*a.h] = b; - a.data[x2 + i*a.w + 0*a.w*a.h] = b; + for(i = y1; i <= y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = r; + a.data[x2 + i*a.w + 0*a.w*a.h] = r; a.data[x1 + i*a.w + 1*a.w*a.h] = g; a.data[x2 + i*a.w + 1*a.w*a.h] = g; - a.data[x1 + i*a.w + 2*a.w*a.h] = r; - a.data[x2 + i*a.w + 2*a.w*a.h] = r; + a.data[x1 + i*a.w + 2*a.w*a.h] = b; + a.data[x2 + i*a.w + 2*a.w*a.h] = b; } } @@ -85,6 +105,43 @@ void draw_bbox(image a, box bbox, int w, float r, float g, float b) } } +void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes) +{ + int i; + + for(i = 0; i < num; ++i){ + int class = max_index(probs[i], classes); + float prob = probs[i][class]; + if(prob > thresh){ + int width = pow(prob, 1./2.)*10+1; + printf("%s: %.2f\n", names[class], prob); + int offset = class*17 % classes; + float red = get_color(0,offset,classes); + float green = get_color(1,offset,classes); + float blue = get_color(2,offset,classes); + float rgb[3]; + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = boxes[i]; + + int left = (b.x-b.w/2.)*im.w; + int right = (b.x+b.w/2.)*im.w; + int top = (b.y-b.h/2.)*im.h; + int bot = (b.y+b.h/2.)*im.h; + + if(left < 0) left = 0; + if(right > im.w-1) right = im.w-1; + if(top < 0) top = 0; + if(bot > im.h-1) bot = im.h-1; + + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (labels) draw_label(im, top + width, left, labels[class], rgb); + } + } +} + + void flip_image(image a) { int i,j,k; diff --git a/src/image.h b/src/image.h index 336cfa19..c3e1a78b 100644 --- a/src/image.h +++ b/src/image.h @@ -20,6 +20,8 @@ void flip_image(image a); void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); void draw_bbox(image a, box bbox, int w, float r, float g, float b); +void draw_label(image a, int r, int c, image label, const float *rgb); +void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes); image image_distance(image a, image b); void scale_image(image m, float s); image crop_image(image im, int dx, int dy, int w, int h); diff --git a/src/layer.h b/src/layer.h index 2a74437c..b3ab6271 100644 --- a/src/layer.h +++ b/src/layer.h @@ -130,11 +130,12 @@ typedef struct { float * x_gpu; float * x_norm_gpu; float * weights_gpu; - float * biases_gpu; - float * scales_gpu; - float * weight_updates_gpu; + + float * biases_gpu; float * bias_updates_gpu; + + float * scales_gpu; float * scale_updates_gpu; float * output_gpu; diff --git a/src/network.c b/src/network.c index 6c7461de..d9585c47 100644 --- a/src/network.c +++ b/src/network.c @@ -26,6 +26,17 @@ int get_current_batch(network net) return batch_num; } +void reset_momentum(network net) +{ + if (net.momentum == 0) return; + net.learning_rate = 0; + net.momentum = 0; + net.decay = 0; + #ifdef GPU + if(gpu_index >= 0) update_network_gpu(net); + #endif +} + float get_current_rate(network net) { int batch_num = get_current_batch(net); @@ -41,6 +52,7 @@ float get_current_rate(network net) for(i = 0; i < net.num_steps; ++i){ if(net.steps[i] > batch_num) return rate; rate *= net.scales[i]; + if(net.steps[i] > batch_num - 1) reset_momentum(net); } return rate; case EXP: diff --git a/src/network.h b/src/network.h index 0ad16ffe..428ff52c 100644 --- a/src/network.h +++ b/src/network.h @@ -51,6 +51,7 @@ float * get_network_delta_gpu_layer(network net, int i); float *get_network_output_gpu(network net); void forward_network_gpu(network net, network_state state); void backward_network_gpu(network net, network_state state); +void update_network_gpu(network net); #endif float get_current_rate(network net); diff --git a/src/yolo.c b/src/yolo.c index 80d85af5..86b132b0 100644 --- a/src/yolo.c +++ b/src/yolo.c @@ -11,40 +11,6 @@ char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; -void draw_yolo(image im, int num, float thresh, box *boxes, float **probs) -{ - int classes = 20; - int i; - - for(i = 0; i < num; ++i){ - int class = max_index(probs[i], classes); - float prob = probs[i][class]; - if(prob > thresh){ - int width = pow(prob, 1./2.)*10+1; - width = 8; - printf("%s: %.2f\n", voc_names[class], prob); - class = class * 7 % 20; - float red = get_color(0,class,classes); - float green = get_color(1,class,classes); - float blue = get_color(2,class,classes); - //red = green = blue = 0; - box b = boxes[i]; - - int left = (b.x-b.w/2.)*im.w; - int right = (b.x+b.w/2.)*im.w; - int top = (b.y-b.h/2.)*im.h; - int bot = (b.y+b.h/2.)*im.h; - - if(left < 0) left = 0; - if(right > im.w-1) right = im.w-1; - if(top < 0) top = 0; - if(bot > im.h-1) bot = im.h-1; - - draw_box_width(im, left, top, right, bot, width, red, green, blue); - } - } -} - void train_yolo(char *cfgfile, char *weightfile) { char *train_images = "data/voc.0712.trainval"; @@ -377,7 +343,7 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); convert_yolo_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0); if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms); - draw_yolo(im, l.side*l.side*l.n, thresh, boxes, probs); + draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, 0, 20); show_image(im, "predictions"); show_image(sized, "resized"); diff --git a/src/yolo_kernels.cu b/src/yolo_kernels.cu index 86cdc53e..78fedafb 100644 --- a/src/yolo_kernels.cu +++ b/src/yolo_kernels.cu @@ -20,6 +20,8 @@ extern "C" image ipl_to_image(IplImage* src); extern "C" void convert_yolo_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); extern "C" void draw_yolo(image im, int num, float thresh, box *boxes, float **probs); +extern "C" char *voc_names[]; + static float **probs; static box *boxes; static network net; @@ -57,7 +59,7 @@ void *detect_in_thread(void *ptr) printf("\033[1;1H"); printf("\nFPS:%.0f\n",fps); printf("Objects:\n\n"); - draw_yolo(det, l.side*l.side*l.n, thresh, boxes, probs); + draw_detections(det, l.side*l.side*l.n, thresh, boxes, probs, voc_names, 0, 20); return 0; }