Added Mixup data augmentation if is used mixup=1

2023-08-10 21:13:14 +03:00 · 2019-06-06 02:55:11 +03:00
parent 2347913ef1
commit 57baf1a487
7 changed files with 251 additions and 141 deletions
--- a/include/darknet.h
+++ b/include/darknet.h
@ -595,6 +595,7 @@ typedef struct network {
    int center;
    int flip; // horizontal flip 50% probability augmentaiont for classifier training (default = 1)
    int blur;
+    int mixup;
    float angle;
    float aspect;
    float exposure;
@ -759,6 +760,7 @@ typedef struct load_args {
    float jitter;
    int flip;
    int blur;
+    int mixup;
    float angle;
    float aspect;
    float saturation;
--- a/src/data.c
+++ b/src/data.c
@ -774,18 +774,52 @@ static box float_to_box_stride(float *f, int stride)
    return b;
 }

+void blend_truth(float *new_truth, int boxes, float *old_truth)
+{
+    const int t_size = 4 + 1;
+    int count_new_truth = 0;
+    int t;
+    for (t = 0; t < boxes; ++t) {
+        float x = new_truth[t*(4 + 1)];
+        if (!x) break;
+        count_new_truth++;
+
+    }
+    for (t = count_new_truth; t < boxes; ++t) {
+        float *new_truth_ptr = new_truth + t*t_size;
+        float *old_truth_ptr = old_truth + (t - count_new_truth)*t_size;
+        float x = old_truth_ptr[0];
+        if (!x) break;
+
+        new_truth_ptr[0] = old_truth_ptr[0];
+        new_truth_ptr[1] = old_truth_ptr[1];
+        new_truth_ptr[2] = old_truth_ptr[2];
+        new_truth_ptr[3] = old_truth_ptr[3];
+        new_truth_ptr[4] = old_truth_ptr[4];
+    }
+    //printf("\n was %d bboxes, now %d bboxes \n", count_new_truth, t);
+}
+
 #ifdef OPENCV

 #include "http_stream.h"

-data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, float jitter,
+data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, float jitter,
    float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int show_imgs)
 {
    const int random_index = random_gen();
    c = c ? c : 3;
    char **random_paths;
+    char **mixup_random_paths = NULL;
    if (track) random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed);
    else random_paths = get_random_paths(paths, n, m);
+
+    int mixup = use_mixup ? random_gen() % 2 : 0;
+    //printf("\n mixup = %d \n", mixup);
+    if (mixup) {
+        if (track) mixup_random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed);
+        else mixup_random_paths = get_random_paths(paths, n, m);
+    }
    int i;
    data d = {0};
    d.shallow = 0;
@ -799,96 +833,135 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
    int augmentation_calculated = 0;

    d.y = make_matrix(n, 5*boxes);
-    for(i = 0; i < n; ++i){
-        const char *filename = random_paths[i];
+    int i_mixup = 0;
+    for (i_mixup = 0; i_mixup <= mixup; i_mixup++) {
+        if (i_mixup) augmentation_calculated = 0;
+        for (i = 0; i < n; ++i) {
+            float *truth = (float*)calloc(5 * boxes, sizeof(float));
+            const char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i];

-        int flag = (c >= 3);
-        mat_cv *src;
-        src = load_image_mat_cv(filename, flag);
-        if (src == NULL) {
-            if (check_mistakes) getchar();
-            continue;
-        }
-
-        int oh = get_height_mat(src);
-        int ow = get_width_mat(src);
-
-        int dw = (ow*jitter);
-        int dh = (oh*jitter);
-
-        if(!augmentation_calculated || !track)
-        {
-            augmentation_calculated = 1;
-            r1 = random_float();
-            r2 = random_float();
-            r3 = random_float();
-            r4 = random_float();
-
-            dhue = rand_uniform_strong(-hue, hue);
-            dsat = rand_scale(saturation);
-            dexp = rand_scale(exposure);
-
-            flip = use_flip ? random_gen() % 2 : 0;
-            blur = rand_int(0, 1) ? (use_blur) : 0;
-        }
-
-        int pleft  = rand_precalc_random(-dw, dw, r1);
-        int pright = rand_precalc_random(-dw, dw, r2);
-        int ptop   = rand_precalc_random(-dh, dh, r3);
-        int pbot   = rand_precalc_random(-dh, dh, r4);
-
-        int swidth =  ow - pleft - pright;
-        int sheight = oh - ptop - pbot;
-
-        float sx = (float)swidth  / ow;
-        float sy = (float)sheight / oh;
-
-        float dx = ((float)pleft/ow)/sx;
-        float dy = ((float)ptop /oh)/sy;
-
-        fill_truth_detection(filename, boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, w, h);
-
-        image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp,
-            blur, boxes, d.y.vals[i]);
-
-        d.X.vals[i] = ai.data;
-
-        if(show_imgs)
-        {
-            char buff[1000];
-            sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg(random_paths[i]), random_gen());
-            int t;
-            for (t = 0; t < boxes; ++t) {
-                box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1);
-                if (!b.x) break;
-                int left = (b.x - b.w / 2.)*ai.w;
-                int right = (b.x + b.w / 2.)*ai.w;
-                int top = (b.y - b.h / 2.)*ai.h;
-                int bot = (b.y + b.h / 2.)*ai.h;
-                draw_box_width(ai, left, top, right, bot, 3, 150, 100, 50); // 3 channels RGB
+            int flag = (c >= 3);
+            mat_cv *src;
+            src = load_image_mat_cv(filename, flag);
+            if (src == NULL) {
+                if (check_mistakes) getchar();
+                continue;
            }

-            save_image(ai, buff);
-            if (show_imgs == 1) {
-                show_image(ai, buff);
-                wait_until_press_key_cv();
-            }
-            printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Click on window and press ESC button \n");
-        }
+            int oh = get_height_mat(src);
+            int ow = get_width_mat(src);

-        release_mat(&src);
+            int dw = (ow*jitter);
+            int dh = (oh*jitter);
+
+            if (!augmentation_calculated || !track)
+            {
+                augmentation_calculated = 1;
+                r1 = random_float();
+                r2 = random_float();
+                r3 = random_float();
+                r4 = random_float();
+
+                dhue = rand_uniform_strong(-hue, hue);
+                dsat = rand_scale(saturation);
+                dexp = rand_scale(exposure);
+
+                flip = use_flip ? random_gen() % 2 : 0;
+                blur = rand_int(0, 1) ? (use_blur) : 0;
+            }
+
+            int pleft = rand_precalc_random(-dw, dw, r1);
+            int pright = rand_precalc_random(-dw, dw, r2);
+            int ptop = rand_precalc_random(-dh, dh, r3);
+            int pbot = rand_precalc_random(-dh, dh, r4);
+
+            int swidth = ow - pleft - pright;
+            int sheight = oh - ptop - pbot;
+
+            float sx = (float)swidth / ow;
+            float sy = (float)sheight / oh;
+
+            float dx = ((float)pleft / ow) / sx;
+            float dy = ((float)ptop / oh) / sy;
+
+            fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h);
+
+            image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp,
+                blur, boxes, d.y.vals[i]);
+
+            if (i_mixup) {
+                image old_img = ai;
+                old_img.data = d.X.vals[i];
+                //show_image(ai, "new");
+                //show_image(old_img, "old");
+                //wait_until_press_key_cv();
+                blend_images_cv(ai, 0.5, old_img, 0.5);
+                blend_truth(truth, boxes, d.y.vals[i]);
+            }
+
+            d.X.vals[i] = ai.data;
+            memcpy(d.y.vals[i], truth, 5*boxes * sizeof(float));
+
+            if (show_imgs)// && i_mixup)   // delete i_mixup
+            {
+                image tmp_ai = copy_image(ai);
+                char buff[1000];
+                sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg(filename), random_gen());
+                int t;
+                for (t = 0; t < boxes; ++t) {
+                    box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1);
+                    if (!b.x) break;
+                    int left = (b.x - b.w / 2.)*ai.w;
+                    int right = (b.x + b.w / 2.)*ai.w;
+                    int top = (b.y - b.h / 2.)*ai.h;
+                    int bot = (b.y + b.h / 2.)*ai.h;
+                    draw_box_width(tmp_ai, left, top, right, bot, 1, 150, 100, 50); // 3 channels RGB
+                }
+
+                save_image(tmp_ai, buff);
+                if (show_imgs == 1) {
+                    show_image(tmp_ai, buff);
+                    wait_until_press_key_cv();
+                }
+                printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Click on window and press ESC button \n");
+                free_image(tmp_ai);
+            }
+
+            release_mat(&src);
+            free(truth);
+        }
    }
    free(random_paths);
+    if(mixup_random_paths) free(mixup_random_paths);
    return d;
 }
 #else    // OPENCV
-data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, float jitter,
+void blend_images(image new_img, float alpha, image old_img, float beta)
+{
+    int i;
+    int data_size = new_img.w * new_img.h * new_img.c;
+    #pragma omp parallel for
+    for (i = 0; i < data_size; ++i)
+        new_img.data[i] = new_img.data[i] * alpha + old_img.data[i] * beta;
+}
+
+data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, float jitter,
    float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int show_imgs)
 {
+    const int random_index = random_gen();
    c = c ? c : 3;
    char **random_paths;
+    char **mixup_random_paths = NULL;
    if(track) random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed);
    else random_paths = get_random_paths(paths, n, m);
+
+    int mixup = use_mixup ? random_gen() % 2 : 0;
+    //printf("\n mixup = %d \n", mixup);
+    if (mixup) {
+        if (track) mixup_random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed);
+        else mixup_random_paths = get_random_paths(paths, n, m);
+    }
+
    int i;
    data d = { 0 };
    d.shallow = 0;
@ -902,82 +975,104 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
    int augmentation_calculated = 0;

    d.y = make_matrix(n, 5 * boxes);
-    for (i = 0; i < n; ++i) {
-        image orig = load_image(random_paths[i], 0, 0, c);
+    int i_mixup = 0;
+    for (i_mixup = 0; i_mixup <= mixup; i_mixup++) {
+        if (i_mixup) augmentation_calculated = 0;
+        for (i = 0; i < n; ++i) {
+            float *truth = (float*)calloc(5 * boxes, sizeof(float));
+            char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i];

-        int oh = orig.h;
-        int ow = orig.w;
+            image orig = load_image(filename, 0, 0, c);

-        int dw = (ow*jitter);
-        int dh = (oh*jitter);
+            int oh = orig.h;
+            int ow = orig.w;

-        if (!augmentation_calculated || !track)
-        {
-            augmentation_calculated = 1;
-            r1 = random_float();
-            r2 = random_float();
-            r3 = random_float();
-            r4 = random_float();
+            int dw = (ow*jitter);
+            int dh = (oh*jitter);

-            dhue = rand_uniform_strong(-hue, hue);
-            dsat = rand_scale(saturation);
-            dexp = rand_scale(exposure);
+            if (!augmentation_calculated || !track)
+            {
+                augmentation_calculated = 1;
+                r1 = random_float();
+                r2 = random_float();
+                r3 = random_float();
+                r4 = random_float();

-            flip = use_flip ? random_gen() % 2 : 0;
-        }
+                dhue = rand_uniform_strong(-hue, hue);
+                dsat = rand_scale(saturation);
+                dexp = rand_scale(exposure);

-        int pleft = rand_precalc_random(-dw, dw, r1);
-        int pright = rand_precalc_random(-dw, dw, r2);
-        int ptop = rand_precalc_random(-dh, dh, r3);
-        int pbot = rand_precalc_random(-dh, dh, r4);
-
-        int swidth = ow - pleft - pright;
-        int sheight = oh - ptop - pbot;
-
-        float sx = (float)swidth / ow;
-        float sy = (float)sheight / oh;
-
-        image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
-
-        float dx = ((float)pleft / ow) / sx;
-        float dy = ((float)ptop / oh) / sy;
-
-        image sized = resize_image(cropped, w, h);
-        if (flip) flip_image(sized);
-        distort_image(sized, dhue, dsat, dexp);
-        //random_distort_image(sized, hue, saturation, exposure);
-        d.X.vals[i] = sized.data;
-
-        fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, w, h);
-
-        if(show_imgs)
-        {
-            char buff[1000];
-            sprintf(buff, "aug_%s_%d", basecfg(random_paths[i]), random_gen());
-            int t;
-            for (t = 0; t < boxes; ++t) {
-                box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1);
-                if (!b.x) break;
-                int left = (b.x - b.w / 2.)*sized.w;
-                int right = (b.x + b.w / 2.)*sized.w;
-                int top = (b.y - b.h / 2.)*sized.h;
-                int bot = (b.y + b.h / 2.)*sized.h;
-                draw_box_width(sized, left, top, right, bot, 3, 150, 100, 50); // 3 channels RGB
+                flip = use_flip ? random_gen() % 2 : 0;
            }

-            show_image(sized, buff);
-            if (show_imgs == 1) {
+            int pleft = rand_precalc_random(-dw, dw, r1);
+            int pright = rand_precalc_random(-dw, dw, r2);
+            int ptop = rand_precalc_random(-dh, dh, r3);
+            int pbot = rand_precalc_random(-dh, dh, r4);
+
+            int swidth = ow - pleft - pright;
+            int sheight = oh - ptop - pbot;
+
+            float sx = (float)swidth / ow;
+            float sy = (float)sheight / oh;
+
+            image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
+
+            float dx = ((float)pleft / ow) / sx;
+            float dy = ((float)ptop / oh) / sy;
+
+            image sized = resize_image(cropped, w, h);
+            if (flip) flip_image(sized);
+            distort_image(sized, dhue, dsat, dexp);
+            //random_distort_image(sized, hue, saturation, exposure);
+
+            fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h);
+
+            if (i_mixup) {
+                image old_img = sized;
+                old_img.data = d.X.vals[i];
+                //show_image(sized, "new");
+                //show_image(old_img, "old");
+                //wait_until_press_key_cv();
+                blend_images(sized, 0.5, old_img, 0.5);
+                blend_truth(truth, boxes, d.y.vals[i]);
+            }
+
+            d.X.vals[i] = sized.data;
+            memcpy(d.y.vals[i], truth, 5 * boxes * sizeof(float));
+
+            if (show_imgs)// && i_mixup)
+            {
+                char buff[1000];
+                sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg(filename), random_gen());
+
+                int t;
+                for (t = 0; t < boxes; ++t) {
+                    box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1);
+                    if (!b.x) break;
+                    int left = (b.x - b.w / 2.)*sized.w;
+                    int right = (b.x + b.w / 2.)*sized.w;
+                    int top = (b.y - b.h / 2.)*sized.h;
+                    int bot = (b.y + b.h / 2.)*sized.h;
+                    draw_box_width(sized, left, top, right, bot, 1, 150, 100, 50); // 3 channels RGB
+                }
+
                save_image(sized, buff);
-                wait_until_press_key_cv();
+                if (show_imgs == 1) {
+                    show_image(sized, buff);
+                    wait_until_press_key_cv();
+                }
+                printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Press Enter: \n");
+                //getchar();
            }
-            printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Press Enter: \n");
-            getchar();
-        }

-        free_image(orig);
-        free_image(cropped);
+            free_image(orig);
+            free_image(cropped);
+            free(truth);
+        }
    }
    free(random_paths);
+    if (mixup_random_paths) free(mixup_random_paths);
    return d;
 }
 #endif    // OPENCV
@ -1002,7 +1097,7 @@ void *load_thread(void *ptr)
    } else if (a.type == REGION_DATA){
        *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
    } else if (a.type == DETECTION_DATA){
-        *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.blur, a.jitter,
+        *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.blur, a.mixup, a.jitter,
            a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.show_imgs);
    } else if (a.type == SWAG_DATA){
        *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
--- a/src/data.h
+++ b/src/data.h
@ -86,7 +86,7 @@ void print_letters(float *pred, int n);
 data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
 data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
 data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
-data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, float jitter,
+data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, float jitter,
    float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int show_imgs);
 data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
 matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
--- a/src/detector.c
+++ b/src/detector.c
@ -129,6 +129,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i

    args.angle = net.angle;
    args.blur = net.blur;
+    args.mixup = net.mixup;
    args.exposure = net.exposure;
    args.saturation = net.saturation;
    args.hue = net.hue;
--- a/src/image_opencv.cpp
+++ b/src/image_opencv.cpp
@ -1238,6 +1238,14 @@ image image_data_augmentation(mat_cv* mat, int w, int h,
    return out;
 }

+// blend two images with (alpha and beta)
+void blend_images_cv(image new_img, float alpha, image old_img, float beta)
+{
+    cv::Mat new_mat(cv::Size(new_img.w, new_img.h), CV_32FC(new_img.c), new_img.data);// , size_t step = AUTO_STEP)
+    cv::Mat old_mat(cv::Size(old_img.w, old_img.h), CV_32FC(old_img.c), old_img.data);
+    cv::addWeighted(new_mat, alpha, old_mat, beta, 0.0, new_mat);
+}
+
 // ====================================================================
 // Show Anchors
 // ====================================================================
--- a/src/image_opencv.h
+++ b/src/image_opencv.h
@ -98,6 +98,9 @@ image image_data_augmentation(mat_cv* mat, int w, int h,
    float jitter, float dhue, float dsat, float dexp,
    int blur, int num_boxes, float *truth);

+// blend two images with (alpha and beta)
+void blend_images_cv(image new_img, float alpha, image old_img, float beta);
+
 // Show Anchors
 void show_acnhors(int number_of_boxes, int num_of_clusters, float *rel_width_height_array, model anchors_data, int width, int height);

--- a/src/parser.c
+++ b/src/parser.c
@ -708,6 +708,7 @@ void parse_net_options(list *options, network *net)
    net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
    net->flip = option_find_int_quiet(options, "flip", 1);
    net->blur = option_find_int_quiet(options, "blur", 0);
+    net->mixup = option_find_int_quiet(options, "mixup", 0);

    net->angle = option_find_float_quiet(options, "angle", 0);
    net->aspect = option_find_float_quiet(options, "aspect", 1);