Improved FPS and and memory leak for ./darknet detector demo ... (bug in OpenCV - allocates memory for each thread without deallocation)

This commit is contained in:
AlexeyAB
2020-04-18 16:08:23 +03:00
parent 5dc7d8074d
commit 56a7ccf07e
3 changed files with 110 additions and 41 deletions

View File

@ -15,7 +15,6 @@
#include <sys/time.h>
#endif
#ifdef OPENCV
#include "http_stream.h"
@ -52,44 +51,77 @@ mat_cv* show_img;
static volatile int flag_exit;
static int letter_box = 0;
void *fetch_in_thread(void *ptr)
static const int thread_wait_ms = 1;
static volatile int run_fetch_in_thread = 0;
static volatile int run_detect_in_thread = 0;
int fetch_in_thread(void *ptr)
{
int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream
if(letter_box)
in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
else
in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
if(!in_s.data){
printf("Stream closed.\n");
flag_exit = 1;
//exit(EXIT_FAILURE);
return 0;
while (!custom_atomic_load_int(&flag_exit)) {
while (!custom_atomic_load_int(&run_fetch_in_thread)) {
if (custom_atomic_load_int(&flag_exit)) return 0;
this_thread_yield();
}
int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream
if (letter_box)
in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
else
in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream);
if (!in_s.data) {
printf("Stream closed.\n");
flag_exit = 1;
//exit(EXIT_FAILURE);
return 0;
}
//in_s = resize_image(in, net.w, net.h);
custom_atomic_store_int(&run_fetch_in_thread, 0);
}
return 0;
}
int fetch_in_thread_sync(void *ptr)
{
custom_atomic_store_int(&run_fetch_in_thread, 1);
while (custom_atomic_load_int(&run_fetch_in_thread)) this_thread_sleep_for(thread_wait_ms);
}
int detect_in_thread(void *ptr)
{
while (!custom_atomic_load_int(&flag_exit)) {
while (!custom_atomic_load_int(&run_detect_in_thread)) {
if (custom_atomic_load_int(&flag_exit)) return 0;
this_thread_yield();
}
layer l = net.layers[net.n - 1];
float *X = det_s.data;
float *prediction = network_predict(net, X);
memcpy(predictions[demo_index], prediction, l.outputs * sizeof(float));
mean_arrays(predictions, NFRAMES, l.outputs, avg);
l.output = avg;
cv_images[demo_index] = det_img;
det_img = cv_images[(demo_index + NFRAMES / 2 + 1) % NFRAMES];
demo_index = (demo_index + 1) % NFRAMES;
if (letter_box)
dets = get_network_boxes(&net, get_width_mat(in_img), get_height_mat(in_img), demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
else
dets = get_network_boxes(&net, net.w, net.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized
custom_atomic_store_int(&run_detect_in_thread, 0);
}
//in_s = resize_image(in, net.w, net.h);
return 0;
}
void *detect_in_thread(void *ptr)
int detect_in_thread_sync(void *ptr)
{
layer l = net.layers[net.n-1];
float *X = det_s.data;
float *prediction = network_predict(net, X);
memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
mean_arrays(predictions, NFRAMES, l.outputs, avg);
l.output = avg;
cv_images[demo_index] = det_img;
det_img = cv_images[(demo_index + NFRAMES / 2 + 1) % NFRAMES];
demo_index = (demo_index + 1) % NFRAMES;
if (letter_box)
dets = get_network_boxes(&net, get_width_mat(in_img), get_height_mat(in_img), demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box
else
dets = get_network_boxes(&net, net.w, net.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized
return 0;
custom_atomic_store_int(&run_detect_in_thread, 1);
while (custom_atomic_load_int(&run_detect_in_thread)) this_thread_sleep_for(thread_wait_ms);
}
double get_wall_time()
@ -158,20 +190,22 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
custom_thread_t fetch_thread = NULL;
custom_thread_t detect_thread = NULL;
if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
if (custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
fetch_in_thread(0);
fetch_in_thread_sync(0); //fetch_in_thread(0);
det_img = in_img;
det_s = in_s;
fetch_in_thread(0);
detect_in_thread(0);
fetch_in_thread_sync(0); //fetch_in_thread(0);
detect_in_thread_sync(0); //fetch_in_thread(0);
det_img = in_img;
det_s = in_s;
for (j = 0; j < NFRAMES / 2; ++j) {
free_detections(dets, nboxes);
fetch_in_thread(0);
detect_in_thread(0);
fetch_in_thread_sync(0); //fetch_in_thread(0);
detect_in_thread_sync(0); //fetch_in_thread(0);
det_img = in_img;
det_s = in_s;
}
@ -213,9 +247,10 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
const float nms = .45; // 0.4F
int local_nboxes = nboxes;
detection *local_dets = dets;
this_thread_yield();
if (!benchmark) if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
if(custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
if (!benchmark) custom_atomic_store_int(&run_fetch_in_thread, 1); // if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
custom_atomic_store_int(&run_detect_in_thread, 1); // if (custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
//if (nms) do_nms_obj(local_dets, local_nboxes, l.classes, nms); // bad results
if (nms) {
@ -285,9 +320,9 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
printf("\n cvWriteFrame \n");
}
custom_join(detect_thread, 0);
while (custom_atomic_load_int(&run_detect_in_thread)) this_thread_sleep_for(thread_wait_ms); // custom_join(detect_thread, 0);
if (!benchmark) {
custom_join(fetch_thread, 0);
while (custom_atomic_load_int(&run_fetch_in_thread)) this_thread_sleep_for(thread_wait_ms); // custom_join(fetch_thread, 0);
free_image(det_s);
}
@ -332,6 +367,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
printf("output_video_writer closed. \n");
}
this_thread_sleep_for(thread_wait_ms);
custom_join(detect_thread, 0);
custom_join(fetch_thread, 0);
// free memory
free_image(in_s);
free_detections(dets, nboxes);

View File

@ -14,6 +14,7 @@
#include <memory>
#include <mutex>
#include <thread>
#include <atomic>
#include <ctime>
using std::cerr;
using std::endl;
@ -705,6 +706,29 @@ int custom_join(custom_thread_t tid, void **value_ptr)
return -1;
}
int custom_atomic_load_int(volatile int* obj)
{
const volatile std::atomic<int>* ptr_a = (const volatile std::atomic<int>*)obj;
return std::atomic_load(ptr_a);
}
void custom_atomic_store_int(volatile int* obj, int desr)
{
volatile std::atomic<int>* ptr_a = (volatile std::atomic<int>*)obj;
std::atomic_store(ptr_a, desr);
}
void this_thread_sleep_for(int ms_time)
{
std::chrono::milliseconds dura(ms_time);
std::this_thread::sleep_for(dura);
}
void this_thread_yield()
{
std::this_thread::yield();
}
#else // C++11
#include <iostream>

View File

@ -24,6 +24,11 @@ typedef void* custom_attr_t;
int custom_create_thread(custom_thread_t * tid, const custom_attr_t * attr, void *(*func) (void *), void *arg);
int custom_join(custom_thread_t thread, void **value_ptr);
int custom_atomic_load_int(volatile int* obj);
void custom_atomic_store_int(volatile int* obj, int desr);
void this_thread_sleep_for(int ms_time);
void this_thread_yield();
#ifdef __cplusplus
}
#endif