From 6e13527f06480505556455dd9aad34d627a74501 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 1 Jun 2019 01:27:18 +0300 Subject: [PATCH] Added GIoU --- .../x64/cfg/yolov3-voc.yolov3-giou-40.cfg | 808 ++++++++++++++++++ build/darknet/x64/cfg/yolov3.coco-giou-12.cfg | 806 +++++++++++++++++ cfg/yolov3-voc.yolov3-giou-40.cfg | 808 ++++++++++++++++++ cfg/yolov3.coco-giou-12.cfg | 806 +++++++++++++++++ include/darknet.h | 27 + src/box.c | 132 ++- src/box.h | 3 + src/option_list.c | 7 + src/option_list.h | 1 + src/parser.c | 10 +- src/utils.c | 13 + src/utils.h | 1 + src/yolo_layer.c | 160 +++- 13 files changed, 3551 insertions(+), 31 deletions(-) create mode 100644 build/darknet/x64/cfg/yolov3-voc.yolov3-giou-40.cfg create mode 100644 build/darknet/x64/cfg/yolov3.coco-giou-12.cfg create mode 100644 cfg/yolov3-voc.yolov3-giou-40.cfg create mode 100644 cfg/yolov3.coco-giou-12.cfg diff --git a/build/darknet/x64/cfg/yolov3-voc.yolov3-giou-40.cfg b/build/darknet/x64/cfg/yolov3-voc.yolov3-giou-40.cfg new file mode 100644 index 00000000..b56f8a5d --- /dev/null +++ b/build/darknet/x64/cfg/yolov3-voc.yolov3-giou-40.cfg @@ -0,0 +1,808 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +## single gpu +learning_rate=0.001 +burn_in=1000 +max_batches = 100400 + +## 2x +#learning_rate=0.0005 +#burn_in=2000 +#max_batches = 100400 +#max_batches = 200800 + +## 4x +#learning_rate=0.00025 +#burn_in=4000 +#max_batches = 50200 +##max_batches = 200800 + +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 +iou_normalizer=0.25 +cls_normalizer=1.0 +iou_loss=giou + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 +iou_normalizer=0.25 +cls_normalizer=1.0 +iou_loss=giou + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 +iou_normalizer=0.25 +cls_normalizer=1.0 +iou_loss=giou + diff --git a/build/darknet/x64/cfg/yolov3.coco-giou-12.cfg b/build/darknet/x64/cfg/yolov3.coco-giou-12.cfg new file mode 100644 index 00000000..f3fd72db --- /dev/null +++ b/build/darknet/x64/cfg/yolov3.coco-giou-12.cfg @@ -0,0 +1,806 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +## single gpu +learning_rate=0.001 +burn_in=1000 +max_batches = 550400 + +## 2 gpu +#learning_rate=0.0005 +#burn_in=2000 +#max_batches = 500200 + +## 4 gpu +#learning_rate=0.00025 +#burn_in=4000 +#max_batches = 500200 +###max_batches = 2000800 + +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +iou_normalizer=0.5 +iou_loss=giou + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +iou_normalizer=0.5 +iou_loss=giou + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +iou_normalizer=0.5 +iou_loss=giou diff --git a/cfg/yolov3-voc.yolov3-giou-40.cfg b/cfg/yolov3-voc.yolov3-giou-40.cfg new file mode 100644 index 00000000..b56f8a5d --- /dev/null +++ b/cfg/yolov3-voc.yolov3-giou-40.cfg @@ -0,0 +1,808 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +## single gpu +learning_rate=0.001 +burn_in=1000 +max_batches = 100400 + +## 2x +#learning_rate=0.0005 +#burn_in=2000 +#max_batches = 100400 +#max_batches = 200800 + +## 4x +#learning_rate=0.00025 +#burn_in=4000 +#max_batches = 50200 +##max_batches = 200800 + +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 +iou_normalizer=0.25 +cls_normalizer=1.0 +iou_loss=giou + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 +iou_normalizer=0.25 +cls_normalizer=1.0 +iou_loss=giou + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 +iou_normalizer=0.25 +cls_normalizer=1.0 +iou_loss=giou + diff --git a/cfg/yolov3.coco-giou-12.cfg b/cfg/yolov3.coco-giou-12.cfg new file mode 100644 index 00000000..f3fd72db --- /dev/null +++ b/cfg/yolov3.coco-giou-12.cfg @@ -0,0 +1,806 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +## single gpu +learning_rate=0.001 +burn_in=1000 +max_batches = 550400 + +## 2 gpu +#learning_rate=0.0005 +#burn_in=2000 +#max_batches = 500200 + +## 4 gpu +#learning_rate=0.00025 +#burn_in=4000 +#max_batches = 500200 +###max_batches = 2000800 + +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +iou_normalizer=0.5 +iou_loss=giou + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +iou_normalizer=0.5 +iou_loss=giou + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +iou_normalizer=0.5 +iou_loss=giou diff --git a/include/darknet.h b/include/darknet.h index 0eb52b37..2af9f253 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -105,6 +105,11 @@ typedef enum { LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU }ACTIVATION; +// parser.h +typedef enum { + IOU, GIOU, MSE +} IOU_LOSS; + // image.h typedef enum{ PNG, BMP, TGA, JPG @@ -309,6 +314,10 @@ struct layer { float *weights; float *weight_updates; + float iou_normalizer; + float cls_normalizer; + int iou_loss; + char *align_bit_weights_gpu; float *mean_arr_gpu; float *align_workspace_gpu; @@ -671,6 +680,24 @@ typedef struct box { float x, y, w, h; } box; +// box.h +typedef struct boxabs { + float left, right, top, bot; +} boxabs; + +// box.h +typedef struct dxrep { + float dt, db, dl, dr; +} dxrep; + +// box.h +typedef struct ious { + float iou, giou; + dxrep dx_iou; + dxrep dx_giou; +} ious; + + // box.h typedef struct detection{ box bbox; diff --git a/src/box.c b/src/box.c index 28371362..640f54a2 100644 --- a/src/box.c +++ b/src/box.c @@ -64,6 +64,30 @@ dbox derivative(box a, box b) return d; } +// where c is the smallest box that fully encompases a and b +boxabs box_c(box a, box b) { + boxabs ba = { 0 }; + ba.top = fmin(a.y - a.h / 2, b.y - b.h / 2); + ba.bot = fmax(a.y + a.h / 2, b.y + b.h / 2); + ba.left = fmin(a.x - a.w / 2, b.x - b.w / 2); + ba.right = fmax(a.x + a.w / 2, b.x + b.w / 2); + return ba; +} + +// representation from x, y, w, h to top, left, bottom, right +boxabs to_tblr(box a) { + boxabs tblr = { 0 }; + float t = a.y - (a.h / 2); + float b = a.y + (a.h / 2); + float l = a.x - (a.w / 2); + float r = a.x + (a.w / 2); + tblr.top = t; + tblr.bot = b; + tblr.left = l; + tblr.right = r; + return tblr; +} + float overlap(float x1, float w1, float x2, float w2) { float l1 = x1 - w1/2; @@ -93,7 +117,113 @@ float box_union(box a, box b) float box_iou(box a, box b) { - return box_intersection(a, b)/box_union(a, b); + //return box_intersection(a, b)/box_union(a, b); + + float I = box_intersection(a, b); + float U = box_union(a, b); + if (I == 0 || U == 0) { + return 0; + } + return I / U; +} + +float box_giou(box a, box b) +{ + boxabs ba = box_c(a, b); + float w = ba.right - ba.left; + float h = ba.bot - ba.top; + float c = w*h; + float iou = box_iou(a, b); + if (c == 0) { + return iou; + } + float u = box_union(a, b); + float giou_term = (c - u) / c; +#ifdef DEBUG_PRINTS + printf(" c: %f, u: %f, giou_term: %f\n", c, u, giou_term); +#endif + return iou - giou_term; +} + +dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { + boxabs pred_tblr = to_tblr(pred); + float pred_t = fmin(pred_tblr.top, pred_tblr.bot); + float pred_b = fmax(pred_tblr.top, pred_tblr.bot); + float pred_l = fmin(pred_tblr.left, pred_tblr.right); + float pred_r = fmax(pred_tblr.left, pred_tblr.right); + + boxabs truth_tblr = to_tblr(truth); +#ifdef DEBUG_PRINTS + printf("\niou: %f, giou: %f\n", box_iou(pred, truth), box_giou(pred, truth)); + printf("pred: x,y,w,h: (%f, %f, %f, %f) -> t,b,l,r: (%f, %f, %f, %f)\n", pred.x, pred.y, pred.w, pred.h, pred_tblr.top, pred_tblr.bot, pred_tblr.left, pred_tblr.right); + printf("truth: x,y,w,h: (%f, %f, %f, %f) -> t,b,l,r: (%f, %f, %f, %f)\n", truth.x, truth.y, truth.w, truth.h, truth_tblr.top, truth_tblr.bot, truth_tblr.left, truth_tblr.right); +#endif + //printf("pred (t,b,l,r): (%f, %f, %f, %f)\n", pred_t, pred_b, pred_l, pred_r); + //printf("trut (t,b,l,r): (%f, %f, %f, %f)\n", truth_tblr.top, truth_tblr.bot, truth_tblr.left, truth_tblr.right); + dxrep dx = { 0 }; + float X = (pred_b - pred_t) * (pred_r - pred_l); + float Xhat = (truth_tblr.bot - truth_tblr.top) * (truth_tblr.right - truth_tblr.left); + float Ih = fmin(pred_b, truth_tblr.bot) - fmax(pred_t, truth_tblr.top); + float Iw = fmin(pred_r, truth_tblr.right) - fmax(pred_l, truth_tblr.left); + float I = Iw * Ih; + float U = X + Xhat - I; + + float Cw = fmax(pred_r, truth_tblr.right) - fmin(pred_l, truth_tblr.left); + float Ch = fmax(pred_b, truth_tblr.bot) - fmin(pred_t, truth_tblr.top); + float C = Cw * Ch; + + // float IoU = I / U; + // Partial Derivatives, derivatives + float dX_wrt_t = -1 * (pred_r - pred_l); + float dX_wrt_b = pred_r - pred_l; + float dX_wrt_l = -1 * (pred_b - pred_t); + float dX_wrt_r = pred_b - pred_t; + + // gradient of I min/max in IoU calc (prediction) + float dI_wrt_t = pred_t > truth_tblr.top ? (-1 * Iw) : 0; + float dI_wrt_b = pred_b < truth_tblr.bot ? Iw : 0; + float dI_wrt_l = pred_l > truth_tblr.left ? (-1 * Ih) : 0; + float dI_wrt_r = pred_r < truth_tblr.right ? Ih : 0; + // derivative of U with regard to x + float dU_wrt_t = dX_wrt_t - dI_wrt_t; + float dU_wrt_b = dX_wrt_b - dI_wrt_b; + float dU_wrt_l = dX_wrt_l - dI_wrt_l; + float dU_wrt_r = dX_wrt_r - dI_wrt_r; + // gradient of C min/max in IoU calc (prediction) + float dC_wrt_t = pred_t < truth_tblr.top ? (-1 * Cw) : 0; + float dC_wrt_b = pred_b > truth_tblr.bot ? Cw : 0; + float dC_wrt_l = pred_l < truth_tblr.left ? (-1 * Ch) : 0; + float dC_wrt_r = pred_r > truth_tblr.right ? Ch : 0; + + // Final IOU loss (prediction) (negative of IOU gradient, we want the negative loss) + float p_dt = 0; + float p_db = 0; + float p_dl = 0; + float p_dr = 0; + if (U > 0) { + p_dt = ((U * dI_wrt_t) - (I * dU_wrt_t)) / (U * U); + p_db = ((U * dI_wrt_b) - (I * dU_wrt_b)) / (U * U); + p_dl = ((U * dI_wrt_l) - (I * dU_wrt_l)) / (U * U); + p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U); + } + + if (iou_loss == GIOU) { + if (C > 0) { + // apply "C" term from gIOU + p_dt += ((C * dU_wrt_t) - (U * dC_wrt_t)) / (C * C); + p_db += ((C * dU_wrt_b) - (U * dC_wrt_b)) / (C * C); + p_dl += ((C * dU_wrt_l) - (U * dC_wrt_l)) / (C * C); + p_dr += ((C * dU_wrt_r) - (U * dC_wrt_r)) / (C * C); + } + } + + // apply grad from prediction min/max for correct corner selection + dx.dt = pred_tblr.top < pred_tblr.bot ? p_dt : p_db; + dx.db = pred_tblr.top < pred_tblr.bot ? p_db : p_dt; + dx.dl = pred_tblr.left < pred_tblr.right ? p_dl : p_dr; + dx.dr = pred_tblr.left < pred_tblr.right ? p_dr : p_dl; + + return dx; } float box_rmse(box a, box b) diff --git a/src/box.h b/src/box.h index 9d6aa4f3..2392fedd 100644 --- a/src/box.h +++ b/src/box.h @@ -33,7 +33,10 @@ extern "C" { box float_to_box(float *f); float box_iou(box a, box b); float box_rmse(box a, box b); +dxrep dx_box_iou(box a, box b, IOU_LOSS iou_loss); +float box_giou(box a, box b); dbox diou(box a, box b); +boxabs to_tblr(box a); void do_nms(box *boxes, float **probs, int total, int classes, float thresh); void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh); //LIB_API void do_nms_sort(detection *dets, int total, int classes, float thresh); diff --git a/src/option_list.c b/src/option_list.c index 22bc8c1b..e4fa7fe4 100644 --- a/src/option_list.c +++ b/src/option_list.c @@ -112,6 +112,13 @@ char *option_find_str(list *l, char *key, char *def) return def; } +char *option_find_str_quiet(list *l, char *key, char *def) +{ + char *v = option_find(l, key); + if (v) return v; + return def; +} + int option_find_int(list *l, char *key, int def) { char *v = option_find(l, key); diff --git a/src/option_list.h b/src/option_list.h index 9efa274a..726b559a 100644 --- a/src/option_list.h +++ b/src/option_list.h @@ -18,6 +18,7 @@ int read_option(char *s, list *options); void option_insert(list *l, char *key, char *val); char *option_find(list *l, char *key); char *option_find_str(list *l, char *key, char *def); +char *option_find_str_quiet(list *l, char *key, char *def); int option_find_int(list *l, char *key, int def); int option_find_int_quiet(list *l, char *key, int def); float option_find_float(list *l, char *key, float def); diff --git a/src/parser.c b/src/parser.c index 56a0bcb7..2e444b9d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -333,7 +333,15 @@ layer parse_yolo(list *options, size_params params) } //assert(l.outputs == params.inputs); - //l.max_boxes = option_find_int_quiet(options, "max", 90); + l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); + l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1); + char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse"); // "iou"); + + if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE; + else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU; + else l.iou_loss = IOU; + fprintf(stderr, "Yolo layer params: iou loss: %s, iou_normalizer: %f, cls_normalizer: %f\n", (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.cls_normalizer); + l.jitter = option_find_float(options, "jitter", .2); l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); diff --git a/src/utils.c b/src/utils.c index cdaf1e61..bee427ed 100644 --- a/src/utils.c +++ b/src/utils.c @@ -621,6 +621,19 @@ float mag_array(float *a, int n) return sqrt(sum); } +// indicies to skip is a bit array +float mag_array_skip(float *a, int n, int * indices_to_skip) +{ + int i; + float sum = 0; + for (i = 0; i < n; ++i) { + if (indices_to_skip[i] != 1) { + sum += a[i] * a[i]; + } + } + return sqrt(sum); +} + void scale_array(float *a, int n, float s) { int i; diff --git a/src/utils.h b/src/utils.h index 183bb641..fe4efe04 100644 --- a/src/utils.h +++ b/src/utils.h @@ -58,6 +58,7 @@ float mean_array(float *a, int n); void mean_arrays(float **a, int n, int els, float *avg); float variance_array(float *a, int n); float mag_array(float *a, int n); +float mag_array_skip(float *a, int n, int * indices_to_skip); float dist_array(float *a, float *b, int n, int sub); float **one_hot_encode(float *a, int n, int k); float sec(clock_t clocks); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index ae48ef7a..18fdf040 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -127,7 +127,7 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; return b; } - +/* float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) { box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); @@ -144,7 +144,57 @@ float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i delta[index + 3*stride] = scale * (th - x[index + 3*stride]); return iou; } +*/ +ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss) +{ + ious all_ious = { 0 }; + // i - step in layer width + // j - step in layer height + // Returns a box in absolute coordinates + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + all_ious.iou = box_iou(pred, truth); + all_ious.giou = box_giou(pred, truth); + // avoid nan in dx_box_iou + if (pred.w == 0) { pred.w = 1.0; } + if (pred.h == 0) { pred.h = 1.0; } + if (iou_loss == MSE) // old loss + { + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2 * n]); + float th = log(truth.h*h / biases[2 * n + 1]); + + delta[index + 0 * stride] = scale * (tx - x[index + 0 * stride]); + delta[index + 1 * stride] = scale * (ty - x[index + 1 * stride]); + delta[index + 2 * stride] = scale * (tw - x[index + 2 * stride]); + delta[index + 3 * stride] = scale * (th - x[index + 3 * stride]); + } + else { + // https://github.com/generalized-iou/g-darknet + // https://arxiv.org/abs/1902.09630v2 + // https://giou.stanford.edu/ + all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); + + // jacobian^t (transpose) + delta[index + 0 * stride] = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); + delta[index + 1 * stride] = (all_ious.dx_iou.dt + all_ious.dx_iou.db); + delta[index + 2 * stride] = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); + delta[index + 3 * stride] = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); + + // predict exponential, apply gradient of e^delta_t ONLY for w,h + delta[index + 2 * stride] *= exp(x[index + 2 * stride]); + delta[index + 3 * stride] *= exp(x[index + 3 * stride]); + + // normalize iou weight + delta[index + 0 * stride] *= iou_normalizer; + delta[index + 1 * stride] *= iou_normalizer; + delta[index + 2 * stride] *= iou_normalizer; + delta[index + 3 * stride] *= iou_normalizer; + } + + return all_ious; +} void delta_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat, int focal_loss) { @@ -202,23 +252,27 @@ static box float_to_box_stride(float *f, int stride) void forward_yolo_layer(const layer l, network_state state) { - int i,j,b,t,n; - memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); + int i, j, b, t, n; + memcpy(l.output, state.input, l.outputs*l.batch * sizeof(float)); #ifndef GPU - for (b = 0; b < l.batch; ++b){ - for(n = 0; n < l.n; ++n){ + for (b = 0; b < l.batch; ++b) { + for (n = 0; n < l.n; ++n) { int index = entry_index(l, b, n*l.w*l.h, 0); - activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + activate_array(l.output + index, 2 * l.w*l.h, LOGISTIC); index = entry_index(l, b, n*l.w*l.h, 4); - activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); + activate_array(l.output + index, (1 + l.classes)*l.w*l.h, LOGISTIC); } } #endif memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); - if(!state.train) return; - float avg_iou = 0; + if (!state.train) return; + //float avg_iou = 0; + float tot_iou = 0; + float tot_giou = 0; + float tot_iou_loss = 0; + float tot_giou_loss = 0; float recall = 0; float recall75 = 0; float avg_cat = 0; @@ -235,7 +289,7 @@ void forward_yolo_layer(const layer l, network_state state) box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h); float best_iou = 0; int best_t = 0; - for(t = 0; t < l.max_boxes; ++t){ + for (t = 0; t < l.max_boxes; ++t) { box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; if (class_id >= l.classes) { @@ -244,7 +298,7 @@ void forward_yolo_layer(const layer l, network_state state) getchar(); continue; // if label contains class_id more than number of classes in the cfg-file } - if(!truth.x) break; // continue; + if (!truth.x) break; // continue; float iou = box_iou(pred, truth); if (iou > best_iou) { best_iou = iou; @@ -253,24 +307,24 @@ void forward_yolo_layer(const layer l, network_state state) } int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); avg_anyobj += l.output[obj_index]; - l.delta[obj_index] = 0 - l.output[obj_index]; + l.delta[obj_index] = l.cls_normalizer * (0 - l.output[obj_index]); if (best_iou > l.ignore_thresh) { l.delta[obj_index] = 0; } if (best_iou > l.truth_thresh) { - l.delta[obj_index] = 1 - l.output[obj_index]; + l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); int class_id = state.truth[best_t*(4 + 1) + b*l.truths + 4]; if (l.map) class_id = l.map[class_id]; int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); } } } } - for(t = 0; t < l.max_boxes; ++t){ + for (t = 0; t < l.max_boxes; ++t) { box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); if (truth.x < 0 || truth.y < 0 || truth.x > 1 || truth.y > 1 || truth.w < 0 || truth.h < 0) { printf(" Wrong label: truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f \n", truth.x, truth.y, truth.w, truth.h); @@ -278,32 +332,40 @@ void forward_yolo_layer(const layer l, network_state state) int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; if (class_id >= l.classes) continue; // if label contains class_id more than number of classes in the cfg-file - if(!truth.x) break; // continue; + if (!truth.x) break; // continue; float best_iou = 0; int best_n = 0; i = (truth.x * l.w); j = (truth.y * l.h); box truth_shift = truth; truth_shift.x = truth_shift.y = 0; - for(n = 0; n < l.total; ++n){ - box pred = {0}; - pred.w = l.biases[2*n]/ state.net.w; - pred.h = l.biases[2*n+1]/ state.net.h; + for (n = 0; n < l.total; ++n) { + box pred = { 0 }; + pred.w = l.biases[2 * n] / state.net.w; + pred.h = l.biases[2 * n + 1] / state.net.h; float iou = box_iou(pred, truth_shift); - if (iou > best_iou){ + if (iou > best_iou) { best_iou = iou; best_n = n; } } int mask_n = int_index(l.mask, best_n, l.n); - if(mask_n >= 0){ + if (mask_n >= 0) { int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + //float iou = + ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); + + // range is 0 <= 1 + tot_iou += all_ious.iou; + tot_iou_loss += 1 - all_ious.iou; + // range is -1 <= giou <= 1 + tot_giou += all_ious.giou; + tot_giou_loss += 1 - all_ious.giou; int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); avg_obj += l.output[obj_index]; - l.delta[obj_index] = 1 - l.output[obj_index]; + l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; if (l.map) class_id = l.map[class_id]; @@ -312,14 +374,54 @@ void forward_yolo_layer(const layer l, network_state state) ++count; ++class_count; - if(iou > .5) recall += 1; - if(iou > .75) recall75 += 1; - avg_iou += iou; + //if(iou > .5) recall += 1; + //if(iou > .75) recall75 += 1; + //avg_iou += iou; + if (all_ious.iou > .5) recall += 1; + if (all_ious.iou > .75) recall75 += 1; } } } - *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); + //*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + //printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou / count, avg_cat / class_count, avg_obj / count, avg_anyobj / (l.w*l.h*l.n*l.batch), recall / count, recall75 / count, count); + + float avg_iou_loss = 0; + // gIOU loss + MSE (objectness) loss + if (l.iou_loss == MSE) { + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + } + else { + // Always compute classification loss both for iou + cls loss and for logging with mse loss + // TODO: remove IOU loss fields before computing MSE on class + // probably split into two arrays + int stride = l.w*l.h; + float* no_iou_loss_delta = calloc(l.batch * l.outputs, sizeof(float)); + memcpy(no_iou_loss_delta, l.delta, l.batch * l.outputs * sizeof(float)); + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + no_iou_loss_delta[index + 0 * stride] = 0; + no_iou_loss_delta[index + 1 * stride] = 0; + no_iou_loss_delta[index + 2 * stride] = 0; + no_iou_loss_delta[index + 3 * stride] = 0; + } + } + } + } + float classification_loss = l.cls_normalizer * pow(mag_array(no_iou_loss_delta, l.outputs * l.batch), 2); + free(no_iou_loss_delta); + + if (l.iou_loss == GIOU) { + avg_iou_loss = count > 0 ? l.iou_normalizer * (tot_giou_loss / count) : 0; + } + else { + avg_iou_loss = count > 0 ? l.iou_normalizer * (tot_iou_loss / count) : 0; + } + *(l.cost) = avg_iou_loss + classification_loss; + } + printf("v3 (%s loss, Normalizer: (iou: %f, cls: %f) Region %d Avg (IOU: %f, GIOU: %f), Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.cls_normalizer, state.index, tot_iou / count, tot_giou / count, avg_cat / class_count, avg_obj / count, avg_anyobj / (l.w*l.h*l.n*l.batch), recall / count, recall75 / count, count); } void backward_yolo_layer(const layer l, network_state state)