mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Some stream fixes
This commit is contained in:
@ -26,6 +26,7 @@ int *cuda_make_int_array(size_t n);
|
|||||||
void cuda_push_array(float *x_gpu, float *x, size_t n);
|
void cuda_push_array(float *x_gpu, float *x, size_t n);
|
||||||
void cuda_pull_array(float *x_gpu, float *x, size_t n);
|
void cuda_pull_array(float *x_gpu, float *x, size_t n);
|
||||||
void cuda_set_device(int n);
|
void cuda_set_device(int n);
|
||||||
|
int cuda_get_device();
|
||||||
void cuda_free(float *x_gpu);
|
void cuda_free(float *x_gpu);
|
||||||
void cuda_random(float *x_gpu, size_t n);
|
void cuda_random(float *x_gpu, size_t n);
|
||||||
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
|
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
|
||||||
|
@ -51,6 +51,7 @@ void forward_network_gpu(network net, network_state state)
|
|||||||
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
|
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
|
||||||
}
|
}
|
||||||
l.forward_gpu(l, state);
|
l.forward_gpu(l, state);
|
||||||
|
cudaStreamSynchronize(get_cuda_stream());
|
||||||
state.input = l.output_gpu;
|
state.input = l.output_gpu;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -392,6 +393,7 @@ float *get_network_output_gpu(network net)
|
|||||||
|
|
||||||
float *network_predict_gpu(network net, float *input)
|
float *network_predict_gpu(network net, float *input)
|
||||||
{
|
{
|
||||||
|
if (net.gpu_index != cuda_get_device())
|
||||||
cuda_set_device(net.gpu_index);
|
cuda_set_device(net.gpu_index);
|
||||||
int size = get_network_input_size(net) * net.batch;
|
int size = get_network_input_size(net) * net.batch;
|
||||||
network_state state;
|
network_state state;
|
||||||
|
@ -158,7 +158,6 @@ int main(int argc, char *argv[])
|
|||||||
det_image = detector.mat_to_image_resize(cur_frame);
|
det_image = detector.mat_to_image_resize(cur_frame);
|
||||||
result_vec = thread_result_vec;
|
result_vec = thread_result_vec;
|
||||||
result_vec = detector.tracking(result_vec); // comment it - if track_id is not required
|
result_vec = detector.tracking(result_vec); // comment it - if track_id is not required
|
||||||
|
|
||||||
#ifdef TRACK_OPTFLOW
|
#ifdef TRACK_OPTFLOW
|
||||||
// track optical flow
|
// track optical flow
|
||||||
if (track_optflow_queue.size() > 0) {
|
if (track_optflow_queue.size() > 0) {
|
||||||
@ -189,7 +188,7 @@ int main(int argc, char *argv[])
|
|||||||
//std::vector<bbox_t> result;
|
//std::vector<bbox_t> result;
|
||||||
auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true
|
auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true
|
||||||
//Sleep(200);
|
//Sleep(200);
|
||||||
Sleep(50);
|
//Sleep(50);
|
||||||
++fps_det_counter;
|
++fps_det_counter;
|
||||||
std::unique_lock<std::mutex> lock(mtx);
|
std::unique_lock<std::mutex> lock(mtx);
|
||||||
thread_result_vec = result;
|
thread_result_vec = result;
|
||||||
|
@ -34,7 +34,7 @@ struct detector_gpu_t{
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id)
|
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id)
|
||||||
{
|
{
|
||||||
int old_gpu_index;
|
int old_gpu_index;
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
@ -178,6 +178,7 @@ YOLODLL_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool
|
|||||||
int old_gpu_index;
|
int old_gpu_index;
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
cudaGetDevice(&old_gpu_index);
|
cudaGetDevice(&old_gpu_index);
|
||||||
|
if(cur_gpu_id != old_gpu_index)
|
||||||
cudaSetDevice(net.gpu_index);
|
cudaSetDevice(net.gpu_index);
|
||||||
#endif
|
#endif
|
||||||
//std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
|
//std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
|
||||||
@ -242,6 +243,7 @@ YOLODLL_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool
|
|||||||
free(sized.data);
|
free(sized.data);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
if (cur_gpu_id != old_gpu_index)
|
||||||
cudaSetDevice(old_gpu_index);
|
cudaSetDevice(old_gpu_index);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -47,6 +47,7 @@ struct image_t {
|
|||||||
class Detector {
|
class Detector {
|
||||||
std::shared_ptr<void> detector_gpu_ptr;
|
std::shared_ptr<void> detector_gpu_ptr;
|
||||||
std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
|
std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
|
||||||
|
const int cur_gpu_id;
|
||||||
public:
|
public:
|
||||||
float nms = .4;
|
float nms = .4;
|
||||||
|
|
||||||
@ -170,8 +171,8 @@ public:
|
|||||||
|
|
||||||
sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create();
|
sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create();
|
||||||
sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); // 15, 21, 31
|
sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); // 15, 21, 31
|
||||||
sync_PyrLKOpticalFlow_gpu->setMaxLevel(5); // +- 50 ptx
|
sync_PyrLKOpticalFlow_gpu->setMaxLevel(3); // +- 5 ptx
|
||||||
sync_PyrLKOpticalFlow_gpu->setNumIters(2000); // def: 30
|
sync_PyrLKOpticalFlow_gpu->setNumIters(1000); // def: 30
|
||||||
|
|
||||||
cv::cuda::setDevice(old_gpu_id);
|
cv::cuda::setDevice(old_gpu_id);
|
||||||
}
|
}
|
||||||
@ -190,10 +191,9 @@ public:
|
|||||||
void update_tracking_flow(cv::Mat src_mat)
|
void update_tracking_flow(cv::Mat src_mat)
|
||||||
{
|
{
|
||||||
int const old_gpu_id = cv::cuda::getDevice();
|
int const old_gpu_id = cv::cuda::getDevice();
|
||||||
|
if (old_gpu_id != gpu_id)
|
||||||
cv::cuda::setDevice(gpu_id);
|
cv::cuda::setDevice(gpu_id);
|
||||||
|
|
||||||
//cv::cuda::Stream stream;
|
|
||||||
|
|
||||||
if (src_mat.channels() == 3) {
|
if (src_mat.channels() == 3) {
|
||||||
if (src_mat_gpu.cols == 0) {
|
if (src_mat_gpu.cols == 0) {
|
||||||
src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type());
|
src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type());
|
||||||
@ -203,6 +203,7 @@ public:
|
|||||||
src_mat_gpu.upload(src_mat, stream);
|
src_mat_gpu.upload(src_mat, stream);
|
||||||
cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream);
|
cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream);
|
||||||
}
|
}
|
||||||
|
if (old_gpu_id != gpu_id)
|
||||||
cv::cuda::setDevice(old_gpu_id);
|
cv::cuda::setDevice(old_gpu_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,19 +216,18 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
int const old_gpu_id = cv::cuda::getDevice();
|
int const old_gpu_id = cv::cuda::getDevice();
|
||||||
|
if(old_gpu_id != gpu_id)
|
||||||
cv::cuda::setDevice(gpu_id);
|
cv::cuda::setDevice(gpu_id);
|
||||||
|
|
||||||
//cv::cuda::Stream stream;
|
|
||||||
|
|
||||||
if (dst_mat_gpu.cols == 0) {
|
if (dst_mat_gpu.cols == 0) {
|
||||||
dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
|
dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
|
||||||
dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
|
dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
|
||||||
tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
|
tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
dst_mat_gpu.upload(dst_mat, stream);
|
dst_mat_gpu.upload(dst_mat, stream);
|
||||||
|
|
||||||
|
|
||||||
cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream);
|
cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream);
|
||||||
|
|
||||||
if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
|
if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
|
||||||
@ -237,6 +237,8 @@ public:
|
|||||||
return cur_bbox_vec;
|
return cur_bbox_vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//return cur_bbox_vec;
|
||||||
|
|
||||||
cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu;
|
cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu;
|
||||||
|
|
||||||
for (auto &i : cur_bbox_vec) {
|
for (auto &i : cur_bbox_vec) {
|
||||||
@ -298,6 +300,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (old_gpu_id != gpu_id)
|
||||||
cv::cuda::setDevice(old_gpu_id);
|
cv::cuda::setDevice(old_gpu_id);
|
||||||
|
|
||||||
return result_bbox_vec;
|
return result_bbox_vec;
|
||||||
|
Reference in New Issue
Block a user