:charizard: fixed non-square aspect ratio thing :charizard:

This commit is contained in:
Joseph Redmon 2017-04-17 14:53:48 -07:00
parent addcc4ef96
commit 6731a3552d
9 changed files with 82 additions and 47 deletions

View File

@ -1,10 +1,10 @@
[net] [net]
# Testing # Testing
# batch=1 batch=1
# subdivisions=1 subdivisions=1
# Training # Training
batch=64 # batch=64
subdivisions=8 # subdivisions=8
batch=1 batch=1
subdivisions=1 subdivisions=1
height=544 height=544

View File

@ -380,5 +380,5 @@ void run_coco(int argc, char **argv)
else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, .5); else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, .5, 0,0,0,0);
} }

View File

@ -68,7 +68,7 @@ void *detect_in_thread(void *ptr)
if(l.type == DETECTION){ if(l.type == DETECTION){
get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
} else if (l.type == REGION){ } else if (l.type == REGION){
get_region_boxes(l, in.w, in.h, demo_thresh, probs, boxes, 0, 0, demo_hier_thresh, 1); get_region_boxes(l, in.w, in.h, net.w, net.h, demo_thresh, probs, boxes, 0, 0, demo_hier_thresh, 1);
} else { } else {
error("Last layer must produce detections\n"); error("Last layer must produce detections\n");
} }
@ -96,7 +96,7 @@ double get_wall_time()
return (double)time.tv_sec + (double)time.tv_usec * .000001; return (double)time.tv_sec + (double)time.tv_usec * .000001;
} }
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh) void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh, int w, int h, int fps, int fullscreen)
{ {
//skip = frame_skip; //skip = frame_skip;
image **alphabet = load_alphabet(); image **alphabet = load_alphabet();
@ -120,10 +120,15 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
cap = cvCaptureFromFile(filename); cap = cvCaptureFromFile(filename);
}else{ }else{
cap = cvCaptureFromCAM(cam_index); cap = cvCaptureFromCAM(cam_index);
if(DEMO){
cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, 1920); if(w){
cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, 1080); cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w);
cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, 60); }
if(h){
cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h);
}
if(fps){
cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, fps);
} }
} }
@ -164,9 +169,13 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
int count = 0; int count = 0;
if(!prefix){ if(!prefix){
cvNamedWindow("Demo", CV_WINDOW_NORMAL); cvNamedWindow("Demo", CV_WINDOW_NORMAL);
if(fullscreen){
cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
} else {
cvMoveWindow("Demo", 0, 0); cvMoveWindow("Demo", 0, 0);
cvResizeWindow("Demo", 1352, 1013); cvResizeWindow("Demo", 1352, 1013);
} }
}
double before = get_wall_time(); double before = get_wall_time();
@ -184,6 +193,12 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
else if(frame_skip == 4) frame_skip = 0; else if(frame_skip == 4) frame_skip = 0;
else if(frame_skip == 60) frame_skip = 4; else if(frame_skip == 60) frame_skip = 4;
else frame_skip = 0; else frame_skip = 0;
} else if (c == 27) {
return;
} else if (c == 63232) {
demo_thresh += .01;
} else if (c == 63233) {
demo_thresh -= .01;
} }
}else{ }else{
char buff[256]; char buff[256];
@ -224,7 +239,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
} }
} }
#else #else
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh) void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh, int w, int h, int fps, int fullscreen)
{ {
fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
} }

View File

@ -2,6 +2,6 @@
#define DEMO_H #define DEMO_H
#include "image.h" #include "image.h"
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh); void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh, int w, int h, int fps, int fullscreen);
#endif #endif

View File

@ -346,7 +346,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
network_predict(net, input.data); network_predict(net, input.data);
int w = val[t].w; int w = val[t].w;
int h = val[t].h; int h = val[t].h;
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5, 0); get_region_boxes(l, w, h, net.w, net.h, thresh, probs, boxes, 0, map, .5, 0);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms); if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
if (coco){ if (coco){
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h); print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@ -477,7 +477,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
network_predict(net, X); network_predict(net, X);
int w = val[t].w; int w = val[t].w;
int h = val[t].h; int h = val[t].h;
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5, 0); get_region_boxes(l, w, h, net.w, net.h, thresh, probs, boxes, 0, map, .5, 0);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms); if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
if (coco){ if (coco){
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h); print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@ -541,7 +541,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
image sized = resize_image(orig, net.w, net.h); image sized = resize_image(orig, net.w, net.h);
char *id = basecfg(path); char *id = basecfg(path);
network_predict(net, sized.data); network_predict(net, sized.data);
get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0, .5, 0); get_region_boxes(l, sized.w, sized.h, net.w, net.h, thresh, probs, boxes, 1, 0, .5, 1);
if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms); if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);
char labelpath[4096]; char labelpath[4096];
@ -624,16 +624,16 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
time=clock(); time=clock();
network_predict(net, X); network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh, 0); get_region_boxes(l, im.w, im.h, net.w, net.h, thresh, probs, boxes, 0, 0, hier_thresh, 1);
if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
//else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); //else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
draw_detections(sized, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes); draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
if(outfile){ if(outfile){
save_image(sized, outfile); save_image(im, outfile);
} }
else{ else{
save_image(sized, "predictions"); save_image(im, "predictions");
show_image(sized, "predictions"); show_image(im, "predictions");
#ifdef OPENCV #ifdef OPENCV
cvWaitKey(0); cvWaitKey(0);
cvDestroyAllWindows(); cvDestroyAllWindows();
@ -684,6 +684,10 @@ void run_detector(int argc, char **argv)
} }
int clear = find_arg(argc, argv, "-clear"); int clear = find_arg(argc, argv, "-clear");
int fullscreen = find_arg(argc, argv, "-fullscreen");
int height = find_int_arg(argc, argv, "-h", 0);
int width = find_int_arg(argc, argv, "-w", 0);
int fps = find_int_arg(argc, argv, "-fps", 0);
char *datacfg = argv[3]; char *datacfg = argv[3];
char *cfg = argv[4]; char *cfg = argv[4];
@ -699,6 +703,6 @@ void run_detector(int argc, char **argv)
int classes = option_find_int(options, "classes", 20); int classes = option_find_int(options, "classes", 20);
char *name_list = option_find_str(options, "names", "data/names.list"); char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list); char **names = get_labels(name_list);
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh); demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh, width, height, fps, fullscreen);
} }
} }

View File

@ -548,7 +548,7 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
printf("%f\n", genaloss); printf("%f\n", genaloss);
scal_ongpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); scal_ongpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);
scal_ongpu(imlayer.outputs*imlayer.batch, .00, gnet.layers[gnet.n-1].delta_gpu, 1); scal_ongpu(imlayer.outputs*imlayer.batch, .1, gnet.layers[gnet.n-1].delta_gpu, 1);
printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch));
printf("features %f\n", cuda_mag_array(gnet.layers[gnet.n-1].delta_gpu, imlayer.outputs*imlayer.batch)); printf("features %f\n", cuda_mag_array(gnet.layers[gnet.n-1].delta_gpu, imlayer.outputs*imlayer.batch));

View File

@ -315,7 +315,35 @@ void backward_region_layer(const layer l, network net)
*/ */
} }
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh, int nomult) void correct_region_boxes(box *boxes, int n, int w, int h, int netw, int neth, int relative)
{
int i;
int new_w=0;
int new_h=0;
if (((float)netw/w) < ((float)neth/h)) {
new_w = netw;
new_h = (h * netw)/w;
} else {
new_h = neth;
new_w = (w * neth)/h;
}
for (i = 0; i < n; ++i){
box b = boxes[i];
b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw);
b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth);
b.w *= (float)netw/new_w;
b.h *= (float)neth/new_h;
if(!relative){
b.x *= w;
b.w *= w;
b.y *= h;
b.h *= h;
}
boxes[i] = b;
}
}
void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh, int relative)
{ {
int i,j,n,z; int i,j,n,z;
float *predictions = l.output; float *predictions = l.output;
@ -354,19 +382,6 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
float scale = predictions[obj_index]; float scale = predictions[obj_index];
boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
if(1){
int max = w > h ? w : h;
boxes[index].x = (boxes[index].x - (max - w)/2./max) / ((float)w/max);
boxes[index].y = (boxes[index].y - (max - h)/2./max) / ((float)h/max);
boxes[index].w *= (float)max/w;
boxes[index].h *= (float)max/h;
}
if(!nomult){
boxes[index].x *= w;
boxes[index].y *= h;
boxes[index].w *= w;
boxes[index].h *= h;
}
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5); int class_index = entry_index(l, 0, n*l.w*l.h + i, 5);
if(l.softmax_tree){ if(l.softmax_tree){
@ -408,6 +423,7 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
} }
} }
} }
correct_region_boxes(boxes, l.w*l.h*l.n, w, h, netw, neth, relative);
} }
#ifdef GPU #ifdef GPU

View File

@ -7,7 +7,7 @@
layer make_region_layer(int batch, int h, int w, int n, int classes, int coords); layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
void forward_region_layer(const layer l, network net); void forward_region_layer(const layer l, network net);
void backward_region_layer(const layer l, network net); void backward_region_layer(const layer l, network net);
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh, int nomult); void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh, int relative);
void resize_region_layer(layer *l, int w, int h); void resize_region_layer(layer *l, int w, int h);
void zero_objectness(layer l); void zero_objectness(layer l);

View File

@ -347,5 +347,5 @@ void run_yolo(int argc, char **argv)
else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, .5); else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, .5, 0,0,0,0);
} }