diff --git a/cfg/tiny-yolo.cfg b/cfg/tiny-yolo.cfg index 5580098b..9a4a184f 100644 --- a/cfg/tiny-yolo.cfg +++ b/cfg/tiny-yolo.cfg @@ -1,6 +1,10 @@ [net] -batch=64 -subdivisions=8 +# Training +# batch=64 +# subdivisions=2 +# Testing +batch=1 +subdivisions=1 width=416 height=416 channels=3 @@ -12,10 +16,11 @@ exposure = 1.5 hue=.1 learning_rate=0.001 -max_batches = 120000 +burn_in=1000 +max_batches = 500200 policy=steps -steps=-1,100,80000,100000 -scales=.1,10,.1,.1 +steps=400000,450000 +scales=.1,.1 [convolutional] batch_normalize=1 @@ -104,7 +109,7 @@ batch_normalize=1 size=3 stride=1 pad=1 -filters=1024 +filters=512 activation=leaky [convolutional] @@ -115,14 +120,14 @@ filters=425 activation=linear [region] -anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741 +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 bias_match=1 classes=80 coords=4 num=5 softmax=1 jitter=.2 -rescore=1 +rescore=0 object_scale=5 noobject_scale=1 diff --git a/include/darknet.h b/include/darknet.h index 4f6bd6d6..cdf3cc66 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -699,6 +699,9 @@ float *network_predict_p(network *net, float *input); int network_width(network *net); int network_height(network *net); float *network_predict_image(network *net, image im); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, box *boxes, float **probs); +int num_boxes(network *net); +box *make_boxes(network *net); void reset_network_state(network net, int b); void reset_network_state(network net, int b); diff --git a/python/darknet.py b/python/darknet.py index 7dff2f06..b2750aea 100644 --- a/python/darknet.py +++ b/python/darknet.py @@ -15,6 +15,12 @@ def sample(probs): def c_array(ctype, values): return (ctype * len(values))(*values) +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + class IMAGE(Structure): _fields_ = [("w", c_int), ("h", c_int), @@ -36,6 +42,24 @@ predict = lib.network_predict_p predict.argtypes = [c_void_p, POINTER(c_float)] predict.restype = POINTER(c_float) +make_boxes = lib.make_boxes +make_boxes.argtypes = [c_void_p] +make_boxes.restype = POINTER(BOX) + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +num_boxes = lib.num_boxes +num_boxes.argtypes = [c_void_p] +num_boxes.restype = c_int + +make_probs = lib.make_probs +make_probs.argtypes = [c_void_p] +make_probs.restype = POINTER(POINTER(c_float)) + +detect = lib.network_predict_p +detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))] + reset_rnn = lib.reset_rnn reset_rnn.argtypes = [c_void_p] @@ -43,6 +67,9 @@ load_net = lib.load_network_p load_net.argtypes = [c_char_p, c_char_p, c_int] load_net.restype = c_void_p +free_image = lib.free_image +free_image.argtypes = [IMAGE] + letterbox_image = lib.letterbox_image letterbox_image.argtypes = [IMAGE, c_int, c_int] letterbox_image.restype = IMAGE @@ -59,6 +86,9 @@ predict_image = lib.network_predict_image predict_image.argtypes = [c_void_p, IMAGE] predict_image.restype = POINTER(c_float) +network_detect = lib.network_detect +network_detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))] + def classify(net, meta, im): out = predict_image(net, im) res = [] @@ -67,20 +97,31 @@ def classify(net, meta, im): res = sorted(res, key=lambda x: -x[1]) return res -def detect(net, meta, im): - out = predict_image(net, im) +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + im = load_image(image, 0, 0) + boxes = make_boxes(net) + probs = make_probs(net) + num = num_boxes(net) + network_detect(net, im, thresh, hier_thresh, nms, boxes, probs) res = [] - for i in range(meta.classes): - res.append((meta.names[i], out[i])) + for j in range(num): + for i in range(meta.classes): + if probs[j][i] > 0: + res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) res = sorted(res, key=lambda x: -x[1]) + free_image(im) + free_ptrs(cast(probs, POINTER(c_void_p)), num) return res - if __name__ == "__main__": - net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) - im = load_image("data/wolf.jpg", 0, 0) - meta = load_meta("cfg/imagenet1k.data") - r = classify(net, meta, im) - print r[:10] + #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) + #im = load_image("data/wolf.jpg", 0, 0) + #meta = load_meta("cfg/imagenet1k.data") + #r = classify(net, meta, im) + #print r[:10] + net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.backup", 0) + meta = load_meta("cfg/coco.data") + r = detect(net, meta, "data/dog.jpg") + print r diff --git a/src/network.c b/src/network.c index 38d5229e..89e18238 100644 --- a/src/network.c +++ b/src/network.c @@ -494,6 +494,38 @@ float *network_predict(network net, float *input) return net.output; } +int num_boxes(network *net) +{ + layer l = net->layers[net->n-1]; + return l.w*l.h*l.n; +} + +box *make_boxes(network *net) +{ + layer l = net->layers[net->n-1]; + box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); + return boxes; +} + +float **make_probs(network *net) +{ + int j; + layer l = net->layers[net->n-1]; + float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *)); + return probs; +} + +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, box *boxes, float **probs) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + if(l.type == REGION){ + get_region_boxes(l, im.w, im.h, net->w, net->h, thresh, probs, boxes, 0, 0, 0, hier_thresh, 0); + if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); + } +} + float *network_predict_p(network *net, float *input) { return network_predict(*net, input);