diff --git a/include/darknet.h b/include/darknet.h index 7fe2d0b2..1ab2f339 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -595,6 +595,7 @@ typedef struct network { int center; int flip; // horizontal flip 50% probability augmentaiont for classifier training (default = 1) int blur; + int mixup; float angle; float aspect; float exposure; @@ -759,6 +760,7 @@ typedef struct load_args { float jitter; int flip; int blur; + int mixup; float angle; float aspect; float saturation; diff --git a/src/data.c b/src/data.c index 53959b4c..005c59aa 100644 --- a/src/data.c +++ b/src/data.c @@ -774,18 +774,52 @@ static box float_to_box_stride(float *f, int stride) return b; } +void blend_truth(float *new_truth, int boxes, float *old_truth) +{ + const int t_size = 4 + 1; + int count_new_truth = 0; + int t; + for (t = 0; t < boxes; ++t) { + float x = new_truth[t*(4 + 1)]; + if (!x) break; + count_new_truth++; + + } + for (t = count_new_truth; t < boxes; ++t) { + float *new_truth_ptr = new_truth + t*t_size; + float *old_truth_ptr = old_truth + (t - count_new_truth)*t_size; + float x = old_truth_ptr[0]; + if (!x) break; + + new_truth_ptr[0] = old_truth_ptr[0]; + new_truth_ptr[1] = old_truth_ptr[1]; + new_truth_ptr[2] = old_truth_ptr[2]; + new_truth_ptr[3] = old_truth_ptr[3]; + new_truth_ptr[4] = old_truth_ptr[4]; + } + //printf("\n was %d bboxes, now %d bboxes \n", count_new_truth, t); +} + #ifdef OPENCV #include "http_stream.h" -data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, float jitter, +data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int show_imgs) { const int random_index = random_gen(); c = c ? c : 3; char **random_paths; + char **mixup_random_paths = NULL; if (track) random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed); else random_paths = get_random_paths(paths, n, m); + + int mixup = use_mixup ? random_gen() % 2 : 0; + //printf("\n mixup = %d \n", mixup); + if (mixup) { + if (track) mixup_random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed); + else mixup_random_paths = get_random_paths(paths, n, m); + } int i; data d = {0}; d.shallow = 0; @@ -799,96 +833,135 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo int augmentation_calculated = 0; d.y = make_matrix(n, 5*boxes); - for(i = 0; i < n; ++i){ - const char *filename = random_paths[i]; + int i_mixup = 0; + for (i_mixup = 0; i_mixup <= mixup; i_mixup++) { + if (i_mixup) augmentation_calculated = 0; + for (i = 0; i < n; ++i) { + float *truth = (float*)calloc(5 * boxes, sizeof(float)); + const char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i]; - int flag = (c >= 3); - mat_cv *src; - src = load_image_mat_cv(filename, flag); - if (src == NULL) { - if (check_mistakes) getchar(); - continue; - } - - int oh = get_height_mat(src); - int ow = get_width_mat(src); - - int dw = (ow*jitter); - int dh = (oh*jitter); - - if(!augmentation_calculated || !track) - { - augmentation_calculated = 1; - r1 = random_float(); - r2 = random_float(); - r3 = random_float(); - r4 = random_float(); - - dhue = rand_uniform_strong(-hue, hue); - dsat = rand_scale(saturation); - dexp = rand_scale(exposure); - - flip = use_flip ? random_gen() % 2 : 0; - blur = rand_int(0, 1) ? (use_blur) : 0; - } - - int pleft = rand_precalc_random(-dw, dw, r1); - int pright = rand_precalc_random(-dw, dw, r2); - int ptop = rand_precalc_random(-dh, dh, r3); - int pbot = rand_precalc_random(-dh, dh, r4); - - int swidth = ow - pleft - pright; - int sheight = oh - ptop - pbot; - - float sx = (float)swidth / ow; - float sy = (float)sheight / oh; - - float dx = ((float)pleft/ow)/sx; - float dy = ((float)ptop /oh)/sy; - - fill_truth_detection(filename, boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); - - image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp, - blur, boxes, d.y.vals[i]); - - d.X.vals[i] = ai.data; - - if(show_imgs) - { - char buff[1000]; - sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg(random_paths[i]), random_gen()); - int t; - for (t = 0; t < boxes; ++t) { - box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1); - if (!b.x) break; - int left = (b.x - b.w / 2.)*ai.w; - int right = (b.x + b.w / 2.)*ai.w; - int top = (b.y - b.h / 2.)*ai.h; - int bot = (b.y + b.h / 2.)*ai.h; - draw_box_width(ai, left, top, right, bot, 3, 150, 100, 50); // 3 channels RGB + int flag = (c >= 3); + mat_cv *src; + src = load_image_mat_cv(filename, flag); + if (src == NULL) { + if (check_mistakes) getchar(); + continue; } - save_image(ai, buff); - if (show_imgs == 1) { - show_image(ai, buff); - wait_until_press_key_cv(); - } - printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Click on window and press ESC button \n"); - } + int oh = get_height_mat(src); + int ow = get_width_mat(src); - release_mat(&src); + int dw = (ow*jitter); + int dh = (oh*jitter); + + if (!augmentation_calculated || !track) + { + augmentation_calculated = 1; + r1 = random_float(); + r2 = random_float(); + r3 = random_float(); + r4 = random_float(); + + dhue = rand_uniform_strong(-hue, hue); + dsat = rand_scale(saturation); + dexp = rand_scale(exposure); + + flip = use_flip ? random_gen() % 2 : 0; + blur = rand_int(0, 1) ? (use_blur) : 0; + } + + int pleft = rand_precalc_random(-dw, dw, r1); + int pright = rand_precalc_random(-dw, dw, r2); + int ptop = rand_precalc_random(-dh, dh, r3); + int pbot = rand_precalc_random(-dh, dh, r4); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + float dx = ((float)pleft / ow) / sx; + float dy = ((float)ptop / oh) / sy; + + fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); + + image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp, + blur, boxes, d.y.vals[i]); + + if (i_mixup) { + image old_img = ai; + old_img.data = d.X.vals[i]; + //show_image(ai, "new"); + //show_image(old_img, "old"); + //wait_until_press_key_cv(); + blend_images_cv(ai, 0.5, old_img, 0.5); + blend_truth(truth, boxes, d.y.vals[i]); + } + + d.X.vals[i] = ai.data; + memcpy(d.y.vals[i], truth, 5*boxes * sizeof(float)); + + if (show_imgs)// && i_mixup) // delete i_mixup + { + image tmp_ai = copy_image(ai); + char buff[1000]; + sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg(filename), random_gen()); + int t; + for (t = 0; t < boxes; ++t) { + box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1); + if (!b.x) break; + int left = (b.x - b.w / 2.)*ai.w; + int right = (b.x + b.w / 2.)*ai.w; + int top = (b.y - b.h / 2.)*ai.h; + int bot = (b.y + b.h / 2.)*ai.h; + draw_box_width(tmp_ai, left, top, right, bot, 1, 150, 100, 50); // 3 channels RGB + } + + save_image(tmp_ai, buff); + if (show_imgs == 1) { + show_image(tmp_ai, buff); + wait_until_press_key_cv(); + } + printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Click on window and press ESC button \n"); + free_image(tmp_ai); + } + + release_mat(&src); + free(truth); + } } free(random_paths); + if(mixup_random_paths) free(mixup_random_paths); return d; } #else // OPENCV -data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, float jitter, +void blend_images(image new_img, float alpha, image old_img, float beta) +{ + int i; + int data_size = new_img.w * new_img.h * new_img.c; + #pragma omp parallel for + for (i = 0; i < data_size; ++i) + new_img.data[i] = new_img.data[i] * alpha + old_img.data[i] * beta; +} + +data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int show_imgs) { + const int random_index = random_gen(); c = c ? c : 3; char **random_paths; + char **mixup_random_paths = NULL; if(track) random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed); else random_paths = get_random_paths(paths, n, m); + + int mixup = use_mixup ? random_gen() % 2 : 0; + //printf("\n mixup = %d \n", mixup); + if (mixup) { + if (track) mixup_random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed); + else mixup_random_paths = get_random_paths(paths, n, m); + } + int i; data d = { 0 }; d.shallow = 0; @@ -902,82 +975,104 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo int augmentation_calculated = 0; d.y = make_matrix(n, 5 * boxes); - for (i = 0; i < n; ++i) { - image orig = load_image(random_paths[i], 0, 0, c); + int i_mixup = 0; + for (i_mixup = 0; i_mixup <= mixup; i_mixup++) { + if (i_mixup) augmentation_calculated = 0; + for (i = 0; i < n; ++i) { + float *truth = (float*)calloc(5 * boxes, sizeof(float)); + char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i]; - int oh = orig.h; - int ow = orig.w; + image orig = load_image(filename, 0, 0, c); - int dw = (ow*jitter); - int dh = (oh*jitter); + int oh = orig.h; + int ow = orig.w; - if (!augmentation_calculated || !track) - { - augmentation_calculated = 1; - r1 = random_float(); - r2 = random_float(); - r3 = random_float(); - r4 = random_float(); + int dw = (ow*jitter); + int dh = (oh*jitter); - dhue = rand_uniform_strong(-hue, hue); - dsat = rand_scale(saturation); - dexp = rand_scale(exposure); + if (!augmentation_calculated || !track) + { + augmentation_calculated = 1; + r1 = random_float(); + r2 = random_float(); + r3 = random_float(); + r4 = random_float(); - flip = use_flip ? random_gen() % 2 : 0; - } + dhue = rand_uniform_strong(-hue, hue); + dsat = rand_scale(saturation); + dexp = rand_scale(exposure); - int pleft = rand_precalc_random(-dw, dw, r1); - int pright = rand_precalc_random(-dw, dw, r2); - int ptop = rand_precalc_random(-dh, dh, r3); - int pbot = rand_precalc_random(-dh, dh, r4); - - int swidth = ow - pleft - pright; - int sheight = oh - ptop - pbot; - - float sx = (float)swidth / ow; - float sy = (float)sheight / oh; - - image cropped = crop_image(orig, pleft, ptop, swidth, sheight); - - float dx = ((float)pleft / ow) / sx; - float dy = ((float)ptop / oh) / sy; - - image sized = resize_image(cropped, w, h); - if (flip) flip_image(sized); - distort_image(sized, dhue, dsat, dexp); - //random_distort_image(sized, hue, saturation, exposure); - d.X.vals[i] = sized.data; - - fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); - - if(show_imgs) - { - char buff[1000]; - sprintf(buff, "aug_%s_%d", basecfg(random_paths[i]), random_gen()); - int t; - for (t = 0; t < boxes; ++t) { - box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1); - if (!b.x) break; - int left = (b.x - b.w / 2.)*sized.w; - int right = (b.x + b.w / 2.)*sized.w; - int top = (b.y - b.h / 2.)*sized.h; - int bot = (b.y + b.h / 2.)*sized.h; - draw_box_width(sized, left, top, right, bot, 3, 150, 100, 50); // 3 channels RGB + flip = use_flip ? random_gen() % 2 : 0; } - show_image(sized, buff); - if (show_imgs == 1) { + int pleft = rand_precalc_random(-dw, dw, r1); + int pright = rand_precalc_random(-dw, dw, r2); + int ptop = rand_precalc_random(-dh, dh, r3); + int pbot = rand_precalc_random(-dh, dh, r4); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft / ow) / sx; + float dy = ((float)ptop / oh) / sy; + + image sized = resize_image(cropped, w, h); + if (flip) flip_image(sized); + distort_image(sized, dhue, dsat, dexp); + //random_distort_image(sized, hue, saturation, exposure); + + fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); + + if (i_mixup) { + image old_img = sized; + old_img.data = d.X.vals[i]; + //show_image(sized, "new"); + //show_image(old_img, "old"); + //wait_until_press_key_cv(); + blend_images(sized, 0.5, old_img, 0.5); + blend_truth(truth, boxes, d.y.vals[i]); + } + + d.X.vals[i] = sized.data; + memcpy(d.y.vals[i], truth, 5 * boxes * sizeof(float)); + + if (show_imgs)// && i_mixup) + { + char buff[1000]; + sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg(filename), random_gen()); + + int t; + for (t = 0; t < boxes; ++t) { + box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1); + if (!b.x) break; + int left = (b.x - b.w / 2.)*sized.w; + int right = (b.x + b.w / 2.)*sized.w; + int top = (b.y - b.h / 2.)*sized.h; + int bot = (b.y + b.h / 2.)*sized.h; + draw_box_width(sized, left, top, right, bot, 1, 150, 100, 50); // 3 channels RGB + } + save_image(sized, buff); - wait_until_press_key_cv(); + if (show_imgs == 1) { + show_image(sized, buff); + wait_until_press_key_cv(); + } + printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Press Enter: \n"); + //getchar(); } - printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Press Enter: \n"); - getchar(); - } - free_image(orig); - free_image(cropped); + free_image(orig); + free_image(cropped); + free(truth); + } } free(random_paths); + if (mixup_random_paths) free(mixup_random_paths); return d; } #endif // OPENCV @@ -1002,7 +1097,7 @@ void *load_thread(void *ptr) } else if (a.type == REGION_DATA){ *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); } else if (a.type == DETECTION_DATA){ - *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.blur, a.jitter, + *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.blur, a.mixup, a.jitter, a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.show_imgs); } else if (a.type == SWAG_DATA){ *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); diff --git a/src/data.h b/src/data.h index 3305db50..b99b4e9a 100644 --- a/src/data.h +++ b/src/data.h @@ -86,7 +86,7 @@ void print_letters(float *pred, int n); data load_data_captcha(char **paths, int n, int m, int k, int w, int h); data load_data_captcha_encode(char **paths, int n, int m, int w, int h); data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); -data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, float jitter, +data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int show_imgs); data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); diff --git a/src/detector.c b/src/detector.c index c9386914..c77a5889 100644 --- a/src/detector.c +++ b/src/detector.c @@ -129,6 +129,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i args.angle = net.angle; args.blur = net.blur; + args.mixup = net.mixup; args.exposure = net.exposure; args.saturation = net.saturation; args.hue = net.hue; diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index d6f726c3..b60f03cb 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1238,6 +1238,14 @@ image image_data_augmentation(mat_cv* mat, int w, int h, return out; } +// blend two images with (alpha and beta) +void blend_images_cv(image new_img, float alpha, image old_img, float beta) +{ + cv::Mat new_mat(cv::Size(new_img.w, new_img.h), CV_32FC(new_img.c), new_img.data);// , size_t step = AUTO_STEP) + cv::Mat old_mat(cv::Size(old_img.w, old_img.h), CV_32FC(old_img.c), old_img.data); + cv::addWeighted(new_mat, alpha, old_mat, beta, 0.0, new_mat); +} + // ==================================================================== // Show Anchors // ==================================================================== diff --git a/src/image_opencv.h b/src/image_opencv.h index 21ee6939..2af5f24e 100644 --- a/src/image_opencv.h +++ b/src/image_opencv.h @@ -98,6 +98,9 @@ image image_data_augmentation(mat_cv* mat, int w, int h, float jitter, float dhue, float dsat, float dexp, int blur, int num_boxes, float *truth); +// blend two images with (alpha and beta) +void blend_images_cv(image new_img, float alpha, image old_img, float beta); + // Show Anchors void show_acnhors(int number_of_boxes, int num_of_clusters, float *rel_width_height_array, model anchors_data, int width, int height); diff --git a/src/parser.c b/src/parser.c index e285448c..b37b2873 100644 --- a/src/parser.c +++ b/src/parser.c @@ -708,6 +708,7 @@ void parse_net_options(list *options, network *net) net->min_crop = option_find_int_quiet(options, "min_crop",net->w); net->flip = option_find_int_quiet(options, "flip", 1); net->blur = option_find_int_quiet(options, "blur", 0); + net->mixup = option_find_int_quiet(options, "mixup", 0); net->angle = option_find_float_quiet(options, "angle", 0); net->aspect = option_find_float_quiet(options, "aspect", 1);