diff --git a/include/darknet.h b/include/darknet.h
index 0acac8a6..2bd70d1c 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -280,6 +280,7 @@ struct layer {
     int random;
     float ignore_thresh;
     float truth_thresh;
+    float iou_thresh;
     float thresh;
     float focus;
     int classfix;
diff --git a/src/parser.c b/src/parser.c
index b05a293f..472b179d 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -380,6 +380,7 @@ layer parse_yolo(list *options, size_params params)
 
     l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
     l.truth_thresh = option_find_float(options, "truth_thresh", 1);
+    l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo]
     l.random = option_find_int_quiet(options, "random", 0);
 
     char *map_file = option_find_str(options, "map", 0);
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index 42e595c1..6c023106 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -193,6 +193,63 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i,
     return all_ious;
 }
 
+ious delta_yolo_box_accumulate(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss)
+{
+    ious all_ious = { 0 };
+    // i - step in layer width
+    // j - step in layer height
+    //  Returns a box in absolute coordinates
+    box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride);
+    all_ious.iou = box_iou(pred, truth);
+    all_ious.giou = box_giou(pred, truth);
+    // avoid nan in dx_box_iou
+    if (pred.w == 0) { pred.w = 1.0; }
+    if (pred.h == 0) { pred.h = 1.0; }
+    if (iou_loss == MSE)    // old loss
+    {
+        float tx = (truth.x*lw - i);
+        float ty = (truth.y*lh - j);
+        float tw = log(truth.w*w / biases[2 * n]);
+        float th = log(truth.h*h / biases[2 * n + 1]);
+
+        // accumulate delta
+        delta[index + 0 * stride] += scale * (tx - x[index + 0 * stride]);
+        delta[index + 1 * stride] += scale * (ty - x[index + 1 * stride]);
+        delta[index + 2 * stride] += scale * (tw - x[index + 2 * stride]);
+        delta[index + 3 * stride] += scale * (th - x[index + 3 * stride]);
+    }
+    else {
+        // https://github.com/generalized-iou/g-darknet
+        // https://arxiv.org/abs/1902.09630v2
+        // https://giou.stanford.edu/
+        all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss);
+
+        // jacobian^t (transpose)
+        float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr);
+        float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db);
+        float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr));
+        float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db));
+
+        // predict exponential, apply gradient of e^delta_t ONLY for w,h
+        dw *= exp(x[index + 2 * stride]);
+        dh *= exp(x[index + 3 * stride]);
+
+        // normalize iou weight
+        dx *= iou_normalizer;
+        dy *= iou_normalizer;
+        dw *= iou_normalizer;
+        dh *= iou_normalizer;
+
+        // accumulate delta
+        delta[index + 0 * stride] += dx;
+        delta[index + 1 * stride] += dy;
+        delta[index + 2 * stride] += dw;
+        delta[index + 3 * stride] += dh;
+    }
+
+    return all_ious;
+}
+
 void delta_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat, int focal_loss)
 {
     int n;
@@ -254,6 +311,7 @@ void forward_yolo_layer(const layer l, network_state state)
     }
 #endif
 
+    // delta is zeroed
     memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
     if (!state.train) return;
     //float avg_iou = 0;
@@ -319,7 +377,7 @@ void forward_yolo_layer(const layer l, network_state state)
                         int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1);
                         delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss);
                         box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
-                        delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss);
+                        delta_yolo_box_accumulate(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss);
                     }
                 }
             }
@@ -353,7 +411,7 @@ void forward_yolo_layer(const layer l, network_state state)
             int mask_n = int_index(l.mask, best_n, l.n);
             if (mask_n >= 0) {
                 int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
-                ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss);
+                ious all_ious = delta_yolo_box_accumulate(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss);
 
                 // range is 0 <= 1
                 tot_iou += all_ious.iou;
@@ -376,6 +434,44 @@ void forward_yolo_layer(const layer l, network_state state)
                 if (all_ious.iou > .5) recall += 1;
                 if (all_ious.iou > .75) recall75 += 1;
             }
+
+            // iou_thresh
+            for (n = 0; n < l.total; ++n) {
+                int mask_n = int_index(l.mask, n, l.n);
+                if (mask_n >= 0 && n != best_n) {
+                    box pred = { 0 };
+                    pred.w = l.biases[2 * n] / state.net.w;
+                    pred.h = l.biases[2 * n + 1] / state.net.h;
+                    float iou = box_iou(pred, truth_shift);
+                    // iou, n
+
+                    if (iou > l.iou_thresh) {
+                        int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
+                        ious all_ious = delta_yolo_box_accumulate(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss);
+
+                        // range is 0 <= 1
+                        tot_iou += all_ious.iou;
+                        tot_iou_loss += 1 - all_ious.iou;
+                        // range is -1 <= giou <= 1
+                        tot_giou += all_ious.giou;
+                        tot_giou_loss += 1 - all_ious.giou;
+
+                        int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4);
+                        avg_obj += l.output[obj_index];
+                        l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]);
+
+                        int class_id = state.truth[t*(4 + 1) + b*l.truths + 4];
+                        if (l.map) class_id = l.map[class_id];
+                        int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1);
+                        delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.focal_loss);
+
+                        ++count;
+                        ++class_count;
+                        if (all_ious.iou > .5) recall += 1;
+                        if (all_ious.iou > .75) recall75 += 1;
+                    }
+                }
+            }
         }
     }
     //*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);