diff --git a/Makefile b/Makefile index d06e6ace..c59088af 100644 --- a/Makefile +++ b/Makefile @@ -111,7 +111,7 @@ CFLAGS+= -DCUDNN_HALF ARCH+= -gencode arch=compute_70,code=[sm_70,compute_70] endif -OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o +OBJ=http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o diff --git a/README.md b/README.md index 245a0f06..c605ce1f 100644 --- a/README.md +++ b/README.md @@ -76,9 +76,9 @@ You can get cfg-files by path: `darknet/cfg/` ##### Examples of results -[![Everything Is AWESOME](http://img.youtube.com/vi/VOC3huqHrss/0.jpg)](https://www.youtube.com/watch?v=VOC3huqHrss "Everything Is AWESOME") +[![Yolo v3](http://img.youtube.com/vi/VOC3huqHrss/0.jpg)](https://www.youtube.com/watch?v=MPU2HistivI "Yolo v3") -Others: https://www.youtube.com/channel/UC7ev3hNVkx4DzZ3LO19oebg +Others: https://www.youtube.com/user/pjreddie/videos ### Improvements in this repository @@ -344,11 +344,13 @@ Training Yolo v3: 5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark -It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: ` ` +It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: + +` ` Where: * `` - integer object number from `0` to `(classes-1)` - * ` ` - float values relative to width and height of image, it can be equal from (0.0 to 1.0] + * ` ` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]` * for example: ` = / ` or ` = / ` * atention: ` ` - are center of rectangle (are not top-left corner) diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index 1858f063..d629cac5 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -133,7 +133,7 @@ true true C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(CUDNN)\include;$(cudnn)\include - CUDNN_HALF;OPENCV;CUDNN;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + OPENCV;CUDNN_HALF;CUDNN;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) c11 c++1y CompileAsCpp @@ -188,8 +188,8 @@ - + @@ -251,7 +251,7 @@ - + diff --git a/build/darknet/x64/cfg/crnn.train.cfg b/build/darknet/x64/cfg/crnn.train.cfg index dcc12d74..e0e0b54c 100644 --- a/build/darknet/x64/cfg/crnn.train.cfg +++ b/build/darknet/x64/cfg/crnn.train.cfg @@ -11,8 +11,16 @@ policy=steps steps=1000,1500 scales=.1,.1 -[rnn] +try_fix_nan=1 + +[connected] +output=256 +activation=leaky + +[crnn] batch_normalize=1 +size=1 +pad=0 output = 1024 hidden=1024 activation=leaky diff --git a/build/darknet/yolo_console_dll.vcxproj b/build/darknet/yolo_console_dll.vcxproj index a965fcaa..c93844b7 100644 --- a/build/darknet/yolo_console_dll.vcxproj +++ b/build/darknet/yolo_console_dll.vcxproj @@ -51,8 +51,7 @@ MultiByte - - + @@ -93,7 +92,7 @@ _CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) - C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib + C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)\lib\x64\;$(ZED_SDK_ROOT_DIR)\lib @@ -117,7 +116,7 @@ true true true - ..\..\include;C:\opencv_source\opencv\bin\install\include + ..\..\include;C:\opencv_source\opencv\bin\install\include;$(CUDA_PATH)\include;$(ZED_SDK_ROOT_DIR)\include OPENCV;_CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) Async true @@ -125,7 +124,8 @@ true true - C:\opencv_source\opencv\bin\install\x64\vc14\lib;C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib + C:\opencv_source\opencv\bin\install\x64\vc14\lib;C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)\lib\x64\;$(ZED_SDK_ROOT_DIR)\lib + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) @@ -138,6 +138,5 @@ - - + \ No newline at end of file diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj index 97ff65c6..c03bbea3 100644 --- a/build/darknet/yolo_cpp_dll.vcxproj +++ b/build/darknet/yolo_cpp_dll.vcxproj @@ -102,7 +102,7 @@ true $(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) $(OutDir)\$(TargetName)$(TargetExt) - ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies) + ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cuda.lib;%(AdditionalDependencies) true @@ -142,8 +142,7 @@ c++1y CompileAsCpp Default - - + OPENCV; true @@ -151,7 +150,7 @@ true true C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) - ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies) + ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cuda.lib;%(AdditionalDependencies) $(OutDir)\$(TargetName)$(TargetExt) @@ -192,8 +191,8 @@ - + @@ -257,7 +256,7 @@ - + diff --git a/cfg/crnn.train.cfg b/cfg/crnn.train.cfg index dcc12d74..e0e0b54c 100644 --- a/cfg/crnn.train.cfg +++ b/cfg/crnn.train.cfg @@ -11,8 +11,16 @@ policy=steps steps=1000,1500 scales=.1,.1 -[rnn] +try_fix_nan=1 + +[connected] +output=256 +activation=leaky + +[crnn] batch_normalize=1 +size=1 +pad=0 output = 1024 hidden=1024 activation=leaky diff --git a/include/darknet.h b/include/darknet.h index 8ca8b493..0a1451e3 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -792,10 +792,11 @@ LIB_API void free_data(data d); LIB_API pthread_t load_data(load_args args); LIB_API pthread_t load_data_in_thread(load_args args); -// cuda.h +// dark_cuda.h LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); LIB_API void cuda_pull_array_async(float *x_gpu, float *x, size_t n); LIB_API void cuda_set_device(int n); +LIB_API void *cuda_get_context(); // utils.h LIB_API void free_ptrs(void **ptrs, int n); @@ -809,6 +810,8 @@ LIB_API metadata get_metadata(char *file); // http_stream.h +LIB_API void delete_json_sender(); +LIB_API void send_json_custom(char const* send_buf, int port, int timeout); LIB_API double get_time_point(); void start_timer(); void stop_timer(); diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp index efe2b5f3..f9278721 100644 --- a/include/yolo_v2_class.hpp +++ b/include/yolo_v2_class.hpp @@ -25,6 +25,7 @@ struct bbox_t { unsigned int obj_id; // class of object - from range [0, classes-1] unsigned int track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) unsigned int frames_counter; // counter of frames on which the object was detected + float x_3d, y_3d, z_3d; // center of object (in Meters) if ZED 3D Camera is used }; struct image_t { @@ -60,8 +61,8 @@ extern "C" LIB_API int get_device_name(int gpu, char* deviceName); class Detector { std::shared_ptr detector_gpu_ptr; std::deque> prev_bbox_vec_deque; - const int cur_gpu_id; public: + const int cur_gpu_id; float nms = .4; bool wait_stream; @@ -79,6 +80,11 @@ public: LIB_API std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true, int const frames_story = 5, int const max_dist = 40); + LIB_API void *get_cuda_context(); + + LIB_API bool send_json_http(std::vector cur_bbox_vec, std::vector obj_names, int frame_id, + std::string filename = "", int timeout = 400000, int port = 8070); + std::vector detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false) { if (img.data == NULL) @@ -115,7 +121,10 @@ public: static std::shared_ptr mat_to_image(cv::Mat img_src) { cv::Mat img; - cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR); + if (img_src.channels() == 4) cv::cvtColor(img_src, img, cv::COLOR_RGBA2BGR); + else if (img_src.channels() == 3) cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR); + else if (img_src.channels() == 1) cv::cvtColor(img_src, img, cv::COLOR_GRAY2BGR); + else std::cerr << " Warning: img_src.channels() is not 1, 3 or 4. It is = " << img_src.channels() << std::endl; std::shared_ptr image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; }); std::shared_ptr ipl_small = std::make_shared(img); *image_ptr = ipl_to_image(ipl_small.get()); @@ -166,7 +175,7 @@ private: #endif // OPENCV }; - +// -------------------------------------------------------------------------------- #if defined(TRACK_OPTFLOW) && defined(OPENCV) && defined(GPU) @@ -183,7 +192,7 @@ public: const int flow_error; - Tracker_optflow(int _gpu_id = 0, int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) : + Tracker_optflow(int _gpu_id = 0, int win_size = 15, int max_level = 3, int iterations = 8000, int _flow_error = -1) : gpu_count(cv::cuda::getCudaEnabledDeviceCount()), gpu_id(std::min(_gpu_id, gpu_count-1)), flow_error((_flow_error > 0)? _flow_error:(win_size*4)) { @@ -249,18 +258,32 @@ public: if (old_gpu_id != gpu_id) cv::cuda::setDevice(gpu_id); - if (src_mat.channels() == 3) { + if (src_mat.channels() == 1 || src_mat.channels() == 3 || src_mat.channels() == 4) { if (src_mat_gpu.cols == 0) { src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type()); src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1); } - update_cur_bbox_vec(_cur_bbox_vec); + if (src_mat.channels() == 1) { + src_mat_gpu.upload(src_mat, stream); + src_mat_gpu.copyTo(src_grey_gpu); + } + else if (src_mat.channels() == 3) { + src_mat_gpu.upload(src_mat, stream); + cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream); + } + else if (src_mat.channels() == 4) { + src_mat_gpu.upload(src_mat, stream); + cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGRA2GRAY, 1, stream); + } + else { + std::cerr << " Warning: src_mat.channels() is not: 1, 3 or 4. It is = " << src_mat.channels() << " \n"; + return; + } - //src_grey_gpu.upload(src_mat, stream); // use BGR - src_mat_gpu.upload(src_mat, stream); - cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream); } + update_cur_bbox_vec(_cur_bbox_vec); + if (old_gpu_id != gpu_id) cv::cuda::setDevice(old_gpu_id); } @@ -355,7 +378,7 @@ public: const int flow_error; - Tracker_optflow(int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) : + Tracker_optflow(int win_size = 15, int max_level = 3, int iterations = 8000, int _flow_error = -1) : flow_error((_flow_error > 0)? _flow_error:(win_size*4)) { sync_PyrLKOpticalFlow = cv::SparsePyrLKOpticalFlow::create(); @@ -396,12 +419,20 @@ public: void update_tracking_flow(cv::Mat new_src_mat, std::vector _cur_bbox_vec) { - if (new_src_mat.channels() == 3) { - - update_cur_bbox_vec(_cur_bbox_vec); - + if (new_src_mat.channels() == 1) { + src_grey = new_src_mat.clone(); + } + else if (new_src_mat.channels() == 3) { cv::cvtColor(new_src_mat, src_grey, CV_BGR2GRAY, 1); } + else if (new_src_mat.channels() == 4) { + cv::cvtColor(new_src_mat, src_grey, CV_BGRA2GRAY, 1); + } + else { + std::cerr << " Warning: new_src_mat.channels() is not: 1, 3 or 4. It is = " << new_src_mat.channels() << " \n"; + return; + } + update_cur_bbox_vec(_cur_bbox_vec); } @@ -416,6 +447,7 @@ public: if (src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols) { src_grey = dst_grey.clone(); + //std::cerr << " Warning: src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols \n"; return cur_bbox_vec; } @@ -611,56 +643,361 @@ public: } } }; + + +class track_kalman_t +{ + int track_id_counter; + std::chrono::steady_clock::time_point global_last_time; + float dT; + +public: + int max_objects; // max objects for tracking + int min_frames; // min frames to consider an object as detected + const float max_dist; // max distance (in px) to track with the same ID + cv::Size img_size; // max value of x,y,w,h + + struct tst_t { + int track_id; + int state_id; + std::chrono::steady_clock::time_point last_time; + int detection_count; + tst_t() : track_id(-1), state_id(-1) {} + }; + std::vector track_id_state_id_time; + std::vector result_vec_pred; + + struct one_kalman_t; + std::vector kalman_vec; + + struct one_kalman_t + { + cv::KalmanFilter kf; + cv::Mat state; + cv::Mat meas; + int measSize, stateSize, contrSize; + + void set_delta_time(float dT) { + kf.transitionMatrix.at(2) = dT; + kf.transitionMatrix.at(9) = dT; + } + + void set(bbox_t box) + { + initialize_kalman(); + + kf.errorCovPre.at(0) = 1; // px + kf.errorCovPre.at(7) = 1; // px + kf.errorCovPre.at(14) = 1; + kf.errorCovPre.at(21) = 1; + kf.errorCovPre.at(28) = 1; // px + kf.errorCovPre.at(35) = 1; // px + + state.at(0) = box.x; + state.at(1) = box.y; + state.at(2) = 0; + state.at(3) = 0; + state.at(4) = box.w; + state.at(5) = box.h; + // <<<< Initialization + + kf.statePost = state; + } + + // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre); + // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) + void correct(bbox_t box) { + meas.at(0) = box.x; + meas.at(1) = box.y; + meas.at(2) = box.w; + meas.at(3) = box.h; + + kf.correct(meas); + + bbox_t new_box = predict(); + if (new_box.w == 0 || new_box.h == 0) { + set(box); + //std::cerr << " force set(): track_id = " << box.track_id << + // ", x = " << box.x << ", y = " << box.y << ", w = " << box.w << ", h = " << box.h << std::endl; + } + } + + // Kalman.predict() calculates: statePre = TransitionMatrix * statePost; + // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) + bbox_t predict() { + bbox_t box; + state = kf.predict(); + + box.x = state.at(0); + box.y = state.at(1); + box.w = state.at(4); + box.h = state.at(5); + return box; + } + + void initialize_kalman() + { + kf = cv::KalmanFilter(stateSize, measSize, contrSize, CV_32F); + + // Transition State Matrix A + // Note: set dT at each processing step! + // [ 1 0 dT 0 0 0 ] + // [ 0 1 0 dT 0 0 ] + // [ 0 0 1 0 0 0 ] + // [ 0 0 0 1 0 0 ] + // [ 0 0 0 0 1 0 ] + // [ 0 0 0 0 0 1 ] + cv::setIdentity(kf.transitionMatrix); + + // Measure Matrix H + // [ 1 0 0 0 0 0 ] + // [ 0 1 0 0 0 0 ] + // [ 0 0 0 0 1 0 ] + // [ 0 0 0 0 0 1 ] + kf.measurementMatrix = cv::Mat::zeros(measSize, stateSize, CV_32F); + kf.measurementMatrix.at(0) = 1.0f; + kf.measurementMatrix.at(7) = 1.0f; + kf.measurementMatrix.at(16) = 1.0f; + kf.measurementMatrix.at(23) = 1.0f; + + // Process Noise Covariance Matrix Q - result smoother with lower values (1e-2) + // [ Ex 0 0 0 0 0 ] + // [ 0 Ey 0 0 0 0 ] + // [ 0 0 Ev_x 0 0 0 ] + // [ 0 0 0 Ev_y 0 0 ] + // [ 0 0 0 0 Ew 0 ] + // [ 0 0 0 0 0 Eh ] + //cv::setIdentity(kf.processNoiseCov, cv::Scalar(1e-3)); + kf.processNoiseCov.at(0) = 1e-2; + kf.processNoiseCov.at(7) = 1e-2; + kf.processNoiseCov.at(14) = 1e-2;// 5.0f; + kf.processNoiseCov.at(21) = 1e-2;// 5.0f; + kf.processNoiseCov.at(28) = 1e-2; + kf.processNoiseCov.at(35) = 1e-2; + + // Measures Noise Covariance Matrix R - result smoother with higher values (1e-1) + cv::setIdentity(kf.measurementNoiseCov, cv::Scalar(1e-1)); + + //cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2)); + // <<<< Kalman Filter + + set_delta_time(0); + } + + + one_kalman_t(int _stateSize = 6, int _measSize = 4, int _contrSize = 0) : + kf(_stateSize, _measSize, _contrSize, CV_32F), measSize(_measSize), stateSize(_stateSize), contrSize(_contrSize) + { + state = cv::Mat(stateSize, 1, CV_32F); // [x,y,v_x,v_y,w,h] + meas = cv::Mat(measSize, 1, CV_32F); // [z_x,z_y,z_w,z_h] + //cv::Mat procNoise(stateSize, 1, type) + // [E_x,E_y,E_v_x,E_v_y,E_w,E_h] + + initialize_kalman(); + } + }; + // ------------------------------------------ + + + + track_kalman_t(int _max_objects = 1000, int _min_frames = 3, float _max_dist = 40, cv::Size _img_size = cv::Size(10000, 10000)) : + max_objects(_max_objects), min_frames(_min_frames), max_dist(_max_dist), img_size(_img_size), + track_id_counter(0) + { + kalman_vec.resize(max_objects); + track_id_state_id_time.resize(max_objects); + result_vec_pred.resize(max_objects); + } + + float calc_dt() { + dT = std::chrono::duration(std::chrono::steady_clock::now() - global_last_time).count(); + return dT; + } + + static float get_distance(float src_x, float src_y, float dst_x, float dst_y) { + return sqrtf((src_x - dst_x)*(src_x - dst_x) + (src_y - dst_y)*(src_y - dst_y)); + } + + void clear_old_states() { + // clear old bboxes + for (size_t state_id = 0; state_id < track_id_state_id_time.size(); ++state_id) + { + float time_sec = std::chrono::duration(std::chrono::steady_clock::now() - track_id_state_id_time[state_id].last_time).count(); + float time_wait = 0.5; // 0.5 second + if (track_id_state_id_time[state_id].track_id > -1) + { + if ((result_vec_pred[state_id].x > img_size.width) || + (result_vec_pred[state_id].y > img_size.height)) + { + track_id_state_id_time[state_id].track_id = -1; + } + + if (time_sec >= time_wait || track_id_state_id_time[state_id].detection_count < 0) { + //std::cerr << " remove track_id = " << track_id_state_id_time[state_id].track_id << ", state_id = " << state_id << std::endl; + track_id_state_id_time[state_id].track_id = -1; // remove bbox + } + } + } + } + + tst_t get_state_id(bbox_t find_box, std::vector &busy_vec) + { + tst_t tst; + tst.state_id = -1; + + float min_dist = std::numeric_limits::max(); + + for (size_t i = 0; i < max_objects; ++i) + { + if (track_id_state_id_time[i].track_id > -1 && result_vec_pred[i].obj_id == find_box.obj_id && busy_vec[i] == false) + { + bbox_t pred_box = result_vec_pred[i]; + + float dist = get_distance(pred_box.x, pred_box.y, find_box.x, find_box.y); + + float movement_dist = std::max(max_dist, static_cast(std::max(pred_box.w, pred_box.h)) ); + + if ((dist < movement_dist) && (dist < min_dist)) { + min_dist = dist; + tst.state_id = i; + } + } + } + + if (tst.state_id > -1) { + track_id_state_id_time[tst.state_id].last_time = std::chrono::steady_clock::now(); + track_id_state_id_time[tst.state_id].detection_count = std::max(track_id_state_id_time[tst.state_id].detection_count + 2, 10); + tst = track_id_state_id_time[tst.state_id]; + busy_vec[tst.state_id] = true; + } + else { + //std::cerr << " Didn't find: obj_id = " << find_box.obj_id << ", x = " << find_box.x << ", y = " << find_box.y << + // ", track_id_counter = " << track_id_counter << std::endl; + } + + return tst; + } + + tst_t new_state_id(std::vector &busy_vec) + { + tst_t tst; + // find empty cell to add new track_id + auto it = std::find_if(track_id_state_id_time.begin(), track_id_state_id_time.end(), [&](tst_t &v) { return v.track_id == -1; }); + if (it != track_id_state_id_time.end()) { + it->state_id = it - track_id_state_id_time.begin(); + //it->track_id = track_id_counter++; + it->track_id = 0; + it->last_time = std::chrono::steady_clock::now(); + it->detection_count = 1; + tst = *it; + busy_vec[it->state_id] = true; + } + + return tst; + } + + std::vector find_state_ids(std::vector result_vec) + { + std::vector tst_vec(result_vec.size()); + + std::vector busy_vec(max_objects, false); + + for (size_t i = 0; i < result_vec.size(); ++i) + { + tst_t tst = get_state_id(result_vec[i], busy_vec); + int state_id = tst.state_id; + int track_id = tst.track_id; + + // if new state_id + if (state_id < 0) { + tst = new_state_id(busy_vec); + state_id = tst.state_id; + track_id = tst.track_id; + if (state_id > -1) { + kalman_vec[state_id].set(result_vec[i]); + //std::cerr << " post: "; + } + } + + //std::cerr << " track_id = " << track_id << ", state_id = " << state_id << + // ", x = " << result_vec[i].x << ", det_count = " << tst.detection_count << std::endl; + + if (state_id > -1) { + tst_vec[i] = tst; + result_vec_pred[state_id] = result_vec[i]; + result_vec_pred[state_id].track_id = track_id; + } + } + + return tst_vec; + } + + std::vector predict() + { + clear_old_states(); + std::vector result_vec; + + for (size_t i = 0; i < max_objects; ++i) + { + tst_t tst = track_id_state_id_time[i]; + if (tst.track_id > -1) { + bbox_t box = kalman_vec[i].predict(); + + result_vec_pred[i].x = box.x; + result_vec_pred[i].y = box.y; + result_vec_pred[i].w = box.w; + result_vec_pred[i].h = box.h; + + if (tst.detection_count >= min_frames) + { + if (track_id_state_id_time[i].track_id == 0) { + track_id_state_id_time[i].track_id = ++track_id_counter; + result_vec_pred[i].track_id = track_id_counter; + } + + result_vec.push_back(result_vec_pred[i]); + } + } + } + //std::cerr << " result_vec.size() = " << result_vec.size() << std::endl; + + //global_last_time = std::chrono::steady_clock::now(); + + return result_vec; + } + + + std::vector correct(std::vector result_vec) + { + calc_dt(); + clear_old_states(); + + for (size_t i = 0; i < max_objects; ++i) + track_id_state_id_time[i].detection_count--; + + std::vector tst_vec = find_state_ids(result_vec); + + for (size_t i = 0; i < tst_vec.size(); ++i) { + tst_t tst = tst_vec[i]; + int state_id = tst.state_id; + if (state_id > -1) + { + kalman_vec[state_id].set_delta_time(dT); + kalman_vec[state_id].correct(result_vec_pred[state_id]); + } + } + + result_vec = predict(); + + global_last_time = std::chrono::steady_clock::now(); + + return result_vec; + } + +}; +// ---------------------------------------------- #endif // OPENCV -//extern "C" { #endif // __cplusplus -/* - // C - wrappers - LIB_API void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id); - LIB_API void delete_detector(); - LIB_API bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size); - LIB_API bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size); - LIB_API bbox_t* detect(image_t img, int *result_size); - LIB_API image_t load_img(char *image_filename); - LIB_API void free_img(image_t m); - -#ifdef __cplusplus -} // extern "C" - -static std::shared_ptr c_detector_ptr; -static std::vector c_result_vec; - -void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id) { - c_detector_ptr = std::make_shared(cfg_filename, weight_filename, gpu_id); -} - -void delete_detector() { c_detector_ptr.reset(); } - -bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size) { - c_result_vec = static_cast(c_detector_ptr.get())->detect(img, thresh, use_mean); - *result_size = c_result_vec.size(); - return c_result_vec.data(); -} - -bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size) { - c_result_vec = static_cast(c_detector_ptr.get())->detect_resized(img, init_w, init_h, thresh, use_mean); - *result_size = c_result_vec.size(); - return c_result_vec.data(); -} - -bbox_t* detect(image_t img, int *result_size) { - return detect_custom(img, 0.24, true, result_size); -} - -image_t load_img(char *image_filename) { - return static_cast(c_detector_ptr.get())->load_image(image_filename); -} -void free_img(image_t m) { - static_cast(c_detector_ptr.get())->free_image(m); -} - -#endif // __cplusplus -*/ -#endif +#endif // YOLO_V2_CLASS_HPP diff --git a/scripts/README.md b/scripts/README.md index 0c8327f0..36985f81 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -16,6 +16,12 @@ Cityscapes: https://www.cityscapes-dataset.com/ Object Tracking Benchmark: http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html +MOT (Multiple object tracking benchmark): https://motchallenge.net/ + +VOT (Visual object tracking): http://www.votchallenge.net/challenges.html + +FREE FLIR Thermal Dataset (infrared): https://www.flir.eu/oem/adas/adas-dataset-form/ + MARS: http://www.liangzheng.com.cn/Project/project_mars.html Market-1501: http://www.liangzheng.org/Project/project_reid.html @@ -30,7 +36,22 @@ Visual Question Answering: https://visualqa.org/download.html Large Movie Review Dataset: http://ai.stanford.edu/~amaas/data/sentiment/ +KITTI (for autonomous driving): http://www.cvlibs.net/datasets/kitti/ + +nuScenes (for autonomous driving): https://www.nuscenes.org/overview + +---- + +Wikipedia's List of datasets: https://en.wikipedia.org/wiki/List_of_datasets_for_machine-learning_research Other datasets (Music, Natural Images, Artificial Datasets, Faces, Text, Speech, Recommendation Systems, Misc): http://deeplearning.net/datasets/ -25 datasets: https://www.analyticsvidhya.com/blog/2018/03/comprehensive-collection-deep-learning-datasets/ \ No newline at end of file +25 datasets: https://www.analyticsvidhya.com/blog/2018/03/comprehensive-collection-deep-learning-datasets/ + +List of datasets: https://riemenschneider.hayko.at/vision/dataset/index.php + +Another list of datasets: http://homepages.inf.ed.ac.uk/rbf/CVonline/Imagedbase.htm + +Pedestrian DATASETs for Vision based Detection and Tracking: https://hemprasad.wordpress.com/2014/11/08/pedestrian-datasets-for-vision-based-detection-and-tracking/ + +TrackingNet: https://tracking-net.org/ \ No newline at end of file diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 0144ca51..6c9445a6 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -4,7 +4,7 @@ #include "cublas_v2.h" #include "activations.h" -#include "cuda.h" +#include "dark_cuda.h" __device__ float lhtan_activate_kernel(float x) diff --git a/src/activation_layer.c b/src/activation_layer.c index 2c323b8d..b8b5d023 100644 --- a/src/activation_layer.c +++ b/src/activation_layer.c @@ -1,6 +1,6 @@ #include "activation_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/activations.h b/src/activations.h index 4ecf97d7..4a382b2d 100644 --- a/src/activations.h +++ b/src/activations.h @@ -1,7 +1,7 @@ #ifndef ACTIVATIONS_H #define ACTIVATIONS_H #include "darknet.h" -#include "cuda.h" +#include "dark_cuda.h" #include "math.h" //typedef enum{ diff --git a/src/avgpool_layer.c b/src/avgpool_layer.c index bae5ff4d..20838bbd 100644 --- a/src/avgpool_layer.c +++ b/src/avgpool_layer.c @@ -1,5 +1,5 @@ #include "avgpool_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) diff --git a/src/avgpool_layer.h b/src/avgpool_layer.h index f7679aa9..2277ec6d 100644 --- a/src/avgpool_layer.h +++ b/src/avgpool_layer.h @@ -2,7 +2,7 @@ #define AVGPOOL_LAYER_H #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/avgpool_layer_kernels.cu b/src/avgpool_layer_kernels.cu index b38ba450..676faa69 100644 --- a/src/avgpool_layer_kernels.cu +++ b/src/avgpool_layer_kernels.cu @@ -3,7 +3,7 @@ #include "cublas_v2.h" #include "avgpool_layer.h" -#include "cuda.h" +#include "dark_cuda.h" __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) { diff --git a/src/blas.h b/src/blas.h index 12d4b9a3..8e91fff2 100644 --- a/src/blas.h +++ b/src/blas.h @@ -1,7 +1,7 @@ #ifndef BLAS_H #define BLAS_H #ifdef GPU -#include "cuda.h" +#include "dark_cuda.h" #include "tree.h" #endif #ifdef __cplusplus diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 98592c81..2070bc1f 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -4,7 +4,7 @@ #include #include "blas.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include "tree.h" diff --git a/src/classifier.c b/src/classifier.c index b5c78d65..5471957a 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -5,7 +5,7 @@ #include "blas.h" #include "assert.h" #include "classifier.h" -#include "cuda.h" +#include "dark_cuda.h" #ifdef WIN32 #include #include "gettimeofday.h" diff --git a/src/col2im_kernels.cu b/src/col2im_kernels.cu index e3b1d233..2f18a0fd 100644 --- a/src/col2im_kernels.cu +++ b/src/col2im_kernels.cu @@ -3,7 +3,7 @@ #include "cublas_v2.h" #include "col2im.h" -#include "cuda.h" +#include "dark_cuda.h" // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE diff --git a/src/connected_layer.c b/src/connected_layer.c index 3520e914..242ab8fb 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -2,7 +2,7 @@ #include "batchnorm_layer.h" #include "convolutional_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 9625ddc6..8ae99710 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -2,12 +2,6 @@ #include "curand.h" #include "cublas_v2.h" -#ifdef CUDNN -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "cudnn.lib") -#endif -#endif - #include "convolutional_layer.h" #include "batchnorm_layer.h" #include "gemm.h" @@ -15,7 +9,7 @@ #include "im2col.h" #include "col2im.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" __global__ void binarize_kernel(float *x, int n, float *binary) @@ -598,6 +592,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) void backward_convolutional_layer_gpu(convolutional_layer l, network_state state) { + if(state.net.try_fix_nan) constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1); gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); if (!l.batch_normalize) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 756fe917..e42e86a7 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -8,12 +8,6 @@ #include #include -#ifdef CUDNN -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "cudnn.lib") -#endif -#endif - #ifdef AI2 #include "xnor_layer.h" #endif diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index bad3b84e..bfc1229f 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -1,7 +1,7 @@ #ifndef CONVOLUTIONAL_LAYER_H #define CONVOLUTIONAL_LAYER_H -#include "cuda.h" +#include "dark_cuda.h" #include "image.h" #include "activations.h" #include "layer.h" diff --git a/src/cost_layer.c b/src/cost_layer.c index 33fd8572..09188106 100644 --- a/src/cost_layer.c +++ b/src/cost_layer.c @@ -1,6 +1,6 @@ #include "cost_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include #include diff --git a/src/crnn_layer.c b/src/crnn_layer.c index 0fc6da72..59cac509 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -1,7 +1,7 @@ #include "crnn_layer.h" #include "convolutional_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" @@ -268,16 +268,18 @@ void forward_crnn_layer_gpu(layer l, network_state state) layer input_layer = *(l.input_layer); layer self_layer = *(l.self_layer); layer output_layer = *(l.output_layer); - /* -#ifdef CUDNN_HALF -// slow and bad - s.index = state.index; - s.net = state.net; - cuda_convert_f32_to_f16(input_layer.weights_gpu, input_layer.c*input_layer.n*input_layer.size*input_layer.size, input_layer.weights_gpu16); - cuda_convert_f32_to_f16(self_layer.weights_gpu, self_layer.c*self_layer.n*self_layer.size*self_layer.size, self_layer.weights_gpu16); - cuda_convert_f32_to_f16(output_layer.weights_gpu, output_layer.c*output_layer.n*output_layer.size*output_layer.size, output_layer.weights_gpu16); + +/* +#ifdef CUDNN_HALF // slow and bad for training + if (!state.train && state.net.cudnn_half) { + s.index = state.index; + cuda_convert_f32_to_f16(input_layer.weights_gpu, input_layer.c*input_layer.n*input_layer.size*input_layer.size, input_layer.weights_gpu16); + cuda_convert_f32_to_f16(self_layer.weights_gpu, self_layer.c*self_layer.n*self_layer.size*self_layer.size, self_layer.weights_gpu16); + cuda_convert_f32_to_f16(output_layer.weights_gpu, output_layer.c*output_layer.n*output_layer.size*output_layer.size, output_layer.weights_gpu16); + } #endif //CUDNN_HALF - */ +*/ + if (state.train) { fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); diff --git a/src/crop_layer.c b/src/crop_layer.c index 258030be..092237f4 100644 --- a/src/crop_layer.c +++ b/src/crop_layer.c @@ -1,5 +1,5 @@ #include "crop_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include image get_crop_image(crop_layer l) diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu index 5b084fe5..46afca6c 100644 --- a/src/crop_layer_kernels.cu +++ b/src/crop_layer_kernels.cu @@ -4,7 +4,7 @@ #include "crop_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "image.h" __device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) diff --git a/src/cuda.c b/src/dark_cuda.c similarity index 96% rename from src/cuda.c rename to src/dark_cuda.c index 9e7745e2..42aa6fc8 100644 --- a/src/cuda.c +++ b/src/dark_cuda.c @@ -14,6 +14,15 @@ int gpu_index = 0; #include "assert.h" #include #include +#include + +#pragma comment(lib, "cuda.lib") + +#ifdef CUDNN +#ifndef USE_CMAKE_LIBS +#pragma comment(lib, "cudnn.lib") +#endif // USE_CMAKE_LIBS +#endif // CUDNN void cuda_set_device(int n) { @@ -30,6 +39,14 @@ int cuda_get_device() return n; } +void *cuda_get_context() +{ + CUcontext pctx; + CUresult status = cuCtxGetCurrent(&pctx); + if(status != CUDA_SUCCESS) fprintf(stderr, " Error: cuCtxGetCurrent() is failed \n"); + return (void *)pctx; +} + void check_error(cudaError_t status) { cudaError_t status2 = cudaGetLastError(); diff --git a/src/cuda.h b/src/dark_cuda.h similarity index 96% rename from src/cuda.h rename to src/dark_cuda.h index fe33a258..34614dc1 100644 --- a/src/cuda.h +++ b/src/dark_cuda.h @@ -18,11 +18,12 @@ extern int gpu_index; #define WARP_SIZE 32 #define BLOCK_TRANSPOSE32 256 +#include #include #include #include #include -#include +//#include #ifdef CUDNN #include @@ -87,4 +88,4 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line #else // GPU //LIB_API void cuda_set_device(int n); #endif // GPU -#endif // CUDA_H +#endif // DARKCUDA_H diff --git a/src/darknet.c b/src/darknet.c index bc7a7b7d..c10a7b2f 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -8,7 +8,7 @@ #include "darknet.h" #include "parser.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "connected_layer.h" diff --git a/src/data.c b/src/data.c index e6fa95c5..e364ed76 100644 --- a/src/data.c +++ b/src/data.c @@ -1,7 +1,7 @@ #include "data.h" #include "utils.h" #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include diff --git a/src/deconvolutional_kernels.cu b/src/deconvolutional_kernels.cu index b0ba1a86..472b2247 100644 --- a/src/deconvolutional_kernels.cu +++ b/src/deconvolutional_kernels.cu @@ -9,7 +9,7 @@ #include "im2col.h" #include "col2im.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) { diff --git a/src/deconvolutional_layer.h b/src/deconvolutional_layer.h index 2e1c4527..bb15a429 100644 --- a/src/deconvolutional_layer.h +++ b/src/deconvolutional_layer.h @@ -1,7 +1,7 @@ #ifndef DECONVOLUTIONAL_LAYER_H #define DECONVOLUTIONAL_LAYER_H -#include "cuda.h" +#include "dark_cuda.h" #include "image.h" #include "activations.h" #include "layer.h" diff --git a/src/detection_layer.c b/src/detection_layer.c index 60fdc90d..64d133f9 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -3,7 +3,7 @@ #include "softmax_layer.h" #include "blas.h" #include "box.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include #include diff --git a/src/detector.c b/src/detector.c index e82cc069..96222379 100644 --- a/src/detector.c +++ b/src/detector.c @@ -923,6 +923,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa printf("\n detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count); + int* detection_per_class_count = (int*)calloc(classes, sizeof(int)); + for (j = 0; j < detections_count; ++j) { + detection_per_class_count[detections[j].class_id]++; + } + int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int)); int rank; @@ -945,7 +950,8 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa { truth_flags[d.unique_truth_index] = 1; pr[d.class_id][rank].tp++; // true-positive - } + } else + pr[d.class_id][rank].fp++; } else { pr[d.class_id][rank].fp++; // false-positive @@ -963,6 +969,10 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa if ((tp + fn) > 0) pr[i][rank].recall = (double)tp / (double)(tp + fn); else pr[i][rank].recall = 0; + + if (rank == (detections_count - 1) && detection_per_class_count[i] != (tp + fp)) { // check for last rank + printf(" class_id: %d - detections = %d, tp+fp = %d, tp = %d, fp = %d \n", i, detection_per_class_count[i], tp+fp, tp, fp); + } } } @@ -1014,6 +1024,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa free(pr); free(detections); free(truth_classes_count); + free(detection_per_class_count); fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); if (reinforcement_fd != NULL) fclose(reinforcement_fd); diff --git a/src/dropout_layer.c b/src/dropout_layer.c index 9eb22982..3c2abed4 100644 --- a/src/dropout_layer.c +++ b/src/dropout_layer.c @@ -1,6 +1,6 @@ #include "dropout_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include diff --git a/src/dropout_layer_kernels.cu b/src/dropout_layer_kernels.cu index f6a93c91..ceba0fa3 100644 --- a/src/dropout_layer_kernels.cu +++ b/src/dropout_layer_kernels.cu @@ -3,7 +3,7 @@ #include "cublas_v2.h" #include "dropout_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) diff --git a/src/gemm.c b/src/gemm.c index b110da61..fec1e16a 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -1,7 +1,7 @@ #include "gemm.h" #include "utils.h" #include "im2col.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include #include diff --git a/src/gru_layer.c b/src/gru_layer.c index eac751a5..29acdaa2 100644 --- a/src/gru_layer.c +++ b/src/gru_layer.c @@ -1,7 +1,7 @@ #include "gru_layer.h" #include "connected_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/http_stream.cpp b/src/http_stream.cpp index b911f22f..e61e506d 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -1,7 +1,8 @@ +#define _XOPEN_SOURCE #include "image.h" #include "http_stream.h" -#ifdef OPENCV + // // a single-threaded, multi client(using select), debug webserver - streaming out mjpg. // on win, _WIN32 has to be defined, must link against ws2_32.lib (socks on linux are for free) @@ -11,6 +12,8 @@ #include #include #include +#include +#include using std::cerr; using std::endl; @@ -87,6 +90,219 @@ static int close_socket(SOCKET s) { #endif // _WIN32 +class JSON_sender +{ + SOCKET sock; + SOCKET maxfd; + fd_set master; + int timeout; // master sock timeout, shutdown after timeout usec. + int close_all_sockets; + + int _write(int sock, char const*const s, int len) + { + if (len < 1) { len = strlen(s); } + return ::send(sock, s, len, 0); + } + +public: + + JSON_sender(int port = 0, int _timeout = 400000) + : sock(INVALID_SOCKET) + , timeout(_timeout) + { + close_all_sockets = 0; + FD_ZERO(&master); + if (port) + open(port); + } + + ~JSON_sender() + { + close_all(); + release(); + } + + bool release() + { + if (sock != INVALID_SOCKET) + ::shutdown(sock, 2); + sock = (INVALID_SOCKET); + return false; + } + + void close_all() + { + close_all_sockets = 1; + write("\n]"); // close JSON array + } + + bool open(int port) + { + sock = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + SOCKADDR_IN address; + address.sin_addr.s_addr = INADDR_ANY; + address.sin_family = AF_INET; + address.sin_port = htons(port); // ::htons(port); + int reuse = 1; + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof(reuse)) < 0) + cerr << "setsockopt(SO_REUSEADDR) failed" << endl; + +#ifdef SO_REUSEPORT + if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, (const char*)&reuse, sizeof(reuse)) < 0) + cerr << "setsockopt(SO_REUSEPORT) failed" << endl; +#endif + if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR) + { + cerr << "error JSON_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl; + return release(); + } + if (::listen(sock, 10) == SOCKET_ERROR) + { + cerr << "error JSON_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl; + return release(); + } + FD_ZERO(&master); + FD_SET(sock, &master); + maxfd = sock; + return true; + } + + bool isOpened() + { + return sock != INVALID_SOCKET; + } + + bool write(char const* outputbuf) + { + fd_set rread = master; + struct timeval select_timeout = { 0, 0 }; + struct timeval socket_timeout = { 0, timeout }; + if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0) + return true; // nothing broken, there's just noone listening + + size_t outlen = strlen(outputbuf); + +#ifdef _WIN32 + for (unsigned i = 0; iclient ? maxfd : client); + FD_SET(client, &master); + _write(client, "HTTP/1.0 200 OK\r\n", 0); + _write(client, + "Server: Mozarella/2.2\r\n" + "Accept-Range: bytes\r\n" + "Connection: close\r\n" + "Max-Age: 0\r\n" + "Expires: 0\r\n" + "Cache-Control: no-cache, private\r\n" + "Pragma: no-cache\r\n" + "Content-Type: application/json\r\n" + //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n" + "\r\n", 0); + _write(client, "[\n", 0); // open JSON array + int n = _write(client, outputbuf, outlen); + cerr << "JSON_sender: new client " << client << endl; + } + else // existing client, just stream pix + { + //char head[400]; + // application/x-resource+json or application/x-collection+json - when you are representing REST resources and collections + // application/json or text/json or text/javascript or text/plain. + // https://stackoverflow.com/questions/477816/what-is-the-correct-json-content-type + //sprintf(head, "\r\nContent-Length: %zu\r\n\r\n", outlen); + //sprintf(head, "--boundary\r\nContent-Type: application/json\r\nContent-Length: %zu\r\n\r\n", outlen); + //_write(s, head, 0); + if (!close_all_sockets) _write(s, ", \n", 0); + int n = _write(s, outputbuf, outlen); + if (n < outlen) + { + cerr << "JSON_sender: kill client " << s << endl; + ::shutdown(s, 2); + FD_CLR(s, &master); + } + + if (close_all_sockets) { + int result = close_socket(s); + cerr << "JSON_sender: close clinet: " << result << " \n"; + continue; + } + } + } + if (close_all_sockets) { + int result = close_socket(sock); + cerr << "JSON_sender: close acceptor: " << result << " \n\n"; + } + return true; + } +}; +// ---------------------------------------- + +static std::unique_ptr js_ptr; +static std::mutex mtx; + +void delete_json_sender() +{ + std::lock_guard lock(mtx); + js_ptr.release(); +} + +void send_json_custom(char const* send_buf, int port, int timeout) +{ + try { + std::lock_guard lock(mtx); + if(!js_ptr) js_ptr.reset(new JSON_sender(port, timeout)); + + js_ptr->write(send_buf); + } + catch (...) { + cerr << " Error in send_json_custom() function \n"; + } +} + +void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout) +{ + try { + char *send_buf = detection_to_json(dets, nboxes, classes, names, frame_id, NULL); + + send_json_custom(send_buf, port, timeout); + std::cout << " JSON-stream sent. \n"; + + free(send_buf); + } + catch (...) { + cerr << " Error in send_json() function \n"; + } +} +// ---------------------------------------- + + +#ifdef OPENCV + #include #include #include @@ -287,195 +503,6 @@ void send_mjpeg(IplImage* ipl, int port, int timeout, int quality) } // ---------------------------------------- -class JSON_sender -{ - SOCKET sock; - SOCKET maxfd; - fd_set master; - int timeout; // master sock timeout, shutdown after timeout usec. - int close_all_sockets; - - int _write(int sock, char const*const s, int len) - { - if (len < 1) { len = strlen(s); } - return ::send(sock, s, len, 0); - } - -public: - - JSON_sender(int port = 0, int _timeout = 400000) - : sock(INVALID_SOCKET) - , timeout(_timeout) - { - close_all_sockets = 0; - FD_ZERO(&master); - if (port) - open(port); - } - - ~JSON_sender() - { - close_all(); - release(); - } - - bool release() - { - if (sock != INVALID_SOCKET) - ::shutdown(sock, 2); - sock = (INVALID_SOCKET); - return false; - } - - void close_all() - { - close_all_sockets = 1; - write("\n]"); // close JSON array - } - - bool open(int port) - { - sock = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - - SOCKADDR_IN address; - address.sin_addr.s_addr = INADDR_ANY; - address.sin_family = AF_INET; - address.sin_port = htons(port); // ::htons(port); - int reuse = 1; - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof(reuse)) < 0) - cerr << "setsockopt(SO_REUSEADDR) failed" << endl; - -#ifdef SO_REUSEPORT - if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, (const char*)&reuse, sizeof(reuse)) < 0) - cerr << "setsockopt(SO_REUSEPORT) failed" << endl; -#endif - if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR) - { - cerr << "error JSON_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl; - return release(); - } - if (::listen(sock, 10) == SOCKET_ERROR) - { - cerr << "error JSON_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl; - return release(); - } - FD_ZERO(&master); - FD_SET(sock, &master); - maxfd = sock; - return true; - } - - bool isOpened() - { - return sock != INVALID_SOCKET; - } - - bool write(char *outputbuf) - { - fd_set rread = master; - struct timeval select_timeout = { 0, 0 }; - struct timeval socket_timeout = { 0, timeout }; - if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0) - return true; // nothing broken, there's just noone listening - - size_t outlen = strlen(outputbuf); - -#ifdef _WIN32 - for (unsigned i = 0; iclient ? maxfd : client); - FD_SET(client, &master); - _write(client, "HTTP/1.0 200 OK\r\n", 0); - _write(client, - "Server: Mozarella/2.2\r\n" - "Accept-Range: bytes\r\n" - "Connection: close\r\n" - "Max-Age: 0\r\n" - "Expires: 0\r\n" - "Cache-Control: no-cache, private\r\n" - "Pragma: no-cache\r\n" - "Content-Type: application/json\r\n" - //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n" - "\r\n", 0); - _write(client, "[\n", 0); // open JSON array - int n = _write(client, outputbuf, outlen); - cerr << "JSON_sender: new client " << client << endl; - } - else // existing client, just stream pix - { - //char head[400]; - // application/x-resource+json or application/x-collection+json - when you are representing REST resources and collections - // application/json or text/json or text/javascript or text/plain. - // https://stackoverflow.com/questions/477816/what-is-the-correct-json-content-type - //sprintf(head, "\r\nContent-Length: %zu\r\n\r\n", outlen); - //sprintf(head, "--boundary\r\nContent-Type: application/json\r\nContent-Length: %zu\r\n\r\n", outlen); - //_write(s, head, 0); - if(!close_all_sockets) _write(s, ", \n", 0); - int n = _write(s, outputbuf, outlen); - if (n < outlen) - { - cerr << "JSON_sender: kill client " << s << endl; - ::shutdown(s, 2); - FD_CLR(s, &master); - } - - if (close_all_sockets) { - int result = close_socket(s); - cerr << "JSON_sender: close clinet: " << result << " \n"; - continue; - } - } - } - if (close_all_sockets) { - int result = close_socket(sock); - cerr << "JSON_sender: close acceptor: " << result << " \n\n"; - } - return true; - } -}; -// ---------------------------------------- - -void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout) -{ - try { - static JSON_sender js(port, timeout); - char *send_buf = detection_to_json(dets, nboxes, classes, names, frame_id, NULL); - - js.write(send_buf); - std::cout << " JSON-stream sent. \n"; - free(send_buf); - } - catch (...) { - cerr << " Error in send_json() function \n"; - } -} - -// ---------------------------------------- - CvCapture* get_capture_video_stream(const char *path) { CvCapture* cap = NULL; try { @@ -641,8 +668,7 @@ image load_image_resize(char *filename, int w, int h, int c, image *im) return out; } - -#endif // OPENCV +#endif // OPENCV // ----------------------------------------------------- diff --git a/src/im2col_kernels.cu b/src/im2col_kernels.cu index da952df5..539824a9 100644 --- a/src/im2col_kernels.cu +++ b/src/im2col_kernels.cu @@ -4,11 +4,10 @@ #include #include "im2col.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include -//#include template diff --git a/src/image.c b/src/image.c index dadaa1e5..72b4bdfb 100644 --- a/src/image.c +++ b/src/image.c @@ -1,7 +1,7 @@ #include "image.h" #include "utils.h" #include "blas.h" -#include "cuda.h" +#include "dark_cuda.h" #include #ifndef _USE_MATH_DEFINES #define _USE_MATH_DEFINES diff --git a/src/layer.c b/src/layer.c index 2361c1e0..ae87065f 100644 --- a/src/layer.c +++ b/src/layer.c @@ -1,5 +1,5 @@ #include "layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include void free_layer(layer l) diff --git a/src/local_layer.h b/src/local_layer.h index 198ca362..8ef0dbad 100644 --- a/src/local_layer.h +++ b/src/local_layer.h @@ -1,7 +1,7 @@ #ifndef LOCAL_LAYER_H #define LOCAL_LAYER_H -#include "cuda.h" +#include "dark_cuda.h" #include "image.h" #include "activations.h" #include "layer.h" diff --git a/src/lstm_layer.c b/src/lstm_layer.c index 2894d3d3..bf1e303b 100644 --- a/src/lstm_layer.c +++ b/src/lstm_layer.c @@ -1,7 +1,7 @@ #include "lstm_layer.h" #include "connected_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index cf99e619..2e217459 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -1,5 +1,5 @@ #include "maxpool_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "gemm.h" #include @@ -32,8 +32,8 @@ void cudnn_maxpool_setup(layer *l) CUDNN_NOT_PROPAGATE_NAN, // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN l->size, l->size, - 0, //l.pad, - 0, //l.pad, + l->pad/2, //0, //l.pad, + l->pad/2, //0, //l.pad, l->stride, l->stride); diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index ba6a2020..0a90c376 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -2,7 +2,7 @@ #define MAXPOOL_LAYER_H #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 29aa257c..36fccf32 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -3,7 +3,7 @@ #include "cublas_v2.h" #include "maxpool_layer.h" -#include "cuda.h" +#include "dark_cuda.h" __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) { diff --git a/src/network_kernels.cu b/src/network_kernels.cu index a40dae0a..298372df 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -1,7 +1,4 @@ -//#include "cuda_runtime.h" -//#include "curand.h" -//#include "cublas_v2.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include diff --git a/src/region_layer.c b/src/region_layer.c index 3221e77e..74c10e13 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -2,7 +2,7 @@ #include "activations.h" #include "blas.h" #include "box.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include #include diff --git a/src/reorg_layer.c b/src/reorg_layer.c index 47c5efa4..72d05773 100644 --- a/src/reorg_layer.c +++ b/src/reorg_layer.c @@ -1,5 +1,5 @@ #include "reorg_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include diff --git a/src/reorg_layer.h b/src/reorg_layer.h index b6ed379c..f2b90930 100644 --- a/src/reorg_layer.h +++ b/src/reorg_layer.h @@ -2,7 +2,7 @@ #define REORG_LAYER_H #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/reorg_old_layer.c b/src/reorg_old_layer.c index defc034d..530da202 100644 --- a/src/reorg_old_layer.c +++ b/src/reorg_old_layer.c @@ -1,5 +1,5 @@ #include "reorg_old_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include diff --git a/src/reorg_old_layer.h b/src/reorg_old_layer.h index b66769b5..15c61f8d 100644 --- a/src/reorg_old_layer.h +++ b/src/reorg_old_layer.h @@ -2,7 +2,7 @@ #define REORG_OLD_LAYER_H #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/rnn_layer.c b/src/rnn_layer.c index 22aade09..28163d75 100644 --- a/src/rnn_layer.c +++ b/src/rnn_layer.c @@ -1,7 +1,7 @@ #include "rnn_layer.h" #include "connected_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/route_layer.c b/src/route_layer.c index 8bd6817a..b502fbe7 100644 --- a/src/route_layer.c +++ b/src/route_layer.c @@ -1,5 +1,5 @@ #include "route_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index b84b72f2..06cd6056 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -1,5 +1,5 @@ #include "shortcut_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include #include diff --git a/src/softmax_layer.c b/src/softmax_layer.c index 3cd607be..9bbff9a0 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -1,6 +1,6 @@ #include "softmax_layer.h" #include "blas.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include "blas.h" diff --git a/src/upsample_layer.c b/src/upsample_layer.c index d31dd49e..1a2783c2 100644 --- a/src/upsample_layer.c +++ b/src/upsample_layer.c @@ -1,5 +1,5 @@ #include "upsample_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include diff --git a/src/upsample_layer.h b/src/upsample_layer.h index 68aff329..4461cb15 100644 --- a/src/upsample_layer.h +++ b/src/upsample_layer.h @@ -1,6 +1,6 @@ #ifndef UPSAMPLE_LAYER_H #define UPSAMPLE_LAYER_H -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/utils.c b/src/utils.c index b4d17c23..904aa2e0 100644 --- a/src/utils.c +++ b/src/utils.c @@ -252,6 +252,8 @@ void replace_image_to_label(const char* input_path, char* output_path) find_replace_extension(output_path, ".BMP", ".txt", output_path); find_replace_extension(output_path, ".ppm", ".txt", output_path); find_replace_extension(output_path, ".PPM", ".txt", output_path); + find_replace_extension(output_path, ".tiff", ".txt", output_path); + find_replace_extension(output_path, ".TIFF", ".txt", output_path); } float sec(clock_t clocks) diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp index be84f4d5..12ab208c 100644 --- a/src/yolo_console_dll.cpp +++ b/src/yolo_console_dll.cpp @@ -5,21 +5,146 @@ #include #include #include +#include #include -#include // std::mutex, std::unique_lock -#include // std::condition_variable +#include // std::mutex, std::unique_lock +#include // std::unordered_map -// To use tracking - uncomment the following line. Tracking is supported only by OpenCV 3.x +// It makes sense only for video-Camera (not for video-File) +// To use - uncomment the following line. Optical-flow is supported only by OpenCV 3.x - 4.x //#define TRACK_OPTFLOW +//#define GPU + +// To use 3D-stereo camera ZED - uncomment the following line. ZED_SDK should be installed. +//#define ZED_STEREO -//#include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.1\include\cuda_runtime.h" -//#pragma comment(lib, "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1/lib/x64/cudart.lib") -//static std::shared_ptr device_ptr(NULL, [](void *img) { cudaDeviceReset(); }); #include "yolo_v2_class.hpp" // imported functions from DLL #ifdef OPENCV +#ifdef ZED_STEREO +#include +#pragma comment(lib, "sl_core64.lib") +#pragma comment(lib, "sl_input64.lib") +#pragma comment(lib, "sl_zed64.lib") + +float getMedian(std::vector &v) { + size_t n = v.size() / 2; + std::nth_element(v.begin(), v.begin() + n, v.end()); + return v[n]; +} + +std::vector get_3d_coordinates(std::vector bbox_vect, cv::Mat xyzrgba) +{ + bool valid_measure; + int i, j; + const int R_max = 4; + + std::vector bbox3d_vect; + + for (auto &cur_box : bbox_vect) { + + int center_i = cur_box.x + cur_box.w * 0.5f, center_j = cur_box.y + cur_box.h * 0.5f; + + std::vector x_vect, y_vect, z_vect; + for (int R = 0; R < R_max; R++) { + for (int y = -R; y <= R; y++) { + for (int x = -R; x <= R; x++) { + i = center_i + x; + j = center_j + y; + sl::float4 out(NAN, NAN, NAN, NAN); + if (i >= 0 && i < xyzrgba.cols && j >= 0 && j < xyzrgba.rows) { + cv::Vec4f &elem = xyzrgba.at(j, i); // x,y,z,w + out.x = elem[0]; + out.y = elem[1]; + out.z = elem[2]; + out.w = elem[3]; + } + valid_measure = std::isfinite(out.z); + if (valid_measure) + { + x_vect.push_back(out.x); + y_vect.push_back(out.y); + z_vect.push_back(out.z); + } + } + } + } + + if (x_vect.size() * y_vect.size() * z_vect.size() > 0) + { + cur_box.x_3d = getMedian(x_vect); + cur_box.y_3d = getMedian(y_vect); + cur_box.z_3d = getMedian(z_vect); + } + else { + cur_box.x_3d = NAN; + cur_box.y_3d = NAN; + cur_box.z_3d = NAN; + } + + bbox3d_vect.emplace_back(cur_box); + } + + return bbox3d_vect; +} + +cv::Mat slMat2cvMat(sl::Mat &input) { + // Mapping between MAT_TYPE and CV_TYPE + int cv_type = -1; + switch (input.getDataType()) { + case sl::MAT_TYPE_32F_C1: + cv_type = CV_32FC1; + break; + case sl::MAT_TYPE_32F_C2: + cv_type = CV_32FC2; + break; + case sl::MAT_TYPE_32F_C3: + cv_type = CV_32FC3; + break; + case sl::MAT_TYPE_32F_C4: + cv_type = CV_32FC4; + break; + case sl::MAT_TYPE_8U_C1: + cv_type = CV_8UC1; + break; + case sl::MAT_TYPE_8U_C2: + cv_type = CV_8UC2; + break; + case sl::MAT_TYPE_8U_C3: + cv_type = CV_8UC3; + break; + case sl::MAT_TYPE_8U_C4: + cv_type = CV_8UC4; + break; + default: + break; + } + return cv::Mat(input.getHeight(), input.getWidth(), cv_type, input.getPtr(sl::MEM_CPU)); +} + +cv::Mat zed_capture_rgb(sl::Camera &zed) { + sl::Mat left; + zed.retrieveImage(left); + return slMat2cvMat(left).clone(); +} + +cv::Mat zed_capture_3d(sl::Camera &zed) { + sl::Mat cur_cloud; + zed.retrieveMeasure(cur_cloud, sl::MEASURE_XYZ); + return slMat2cvMat(cur_cloud).clone(); +} + +static sl::Camera zed; // ZED-camera + +#else // ZED_STEREO +std::vector get_3d_coordinates(std::vector bbox_vect, cv::Mat xyzrgba) { + return bbox_vect; +} +#endif // ZED_STEREO + + #include // C++ #include #ifndef CV_VERSION_EPOCH @@ -44,139 +169,6 @@ #endif // USE_CMAKE_LIBS #endif // CV_VERSION_EPOCH -class track_kalman { -public: - cv::KalmanFilter kf; - int state_size, meas_size, contr_size; - - - track_kalman(int _state_size = 10, int _meas_size = 10, int _contr_size = 0) - : state_size(_state_size), meas_size(_meas_size), contr_size(_contr_size) - { - kf.init(state_size, meas_size, contr_size, CV_32F); - - cv::setIdentity(kf.measurementMatrix); - cv::setIdentity(kf.measurementNoiseCov, cv::Scalar::all(1e-1)); - cv::setIdentity(kf.processNoiseCov, cv::Scalar::all(1e-5)); - cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2)); - cv::setIdentity(kf.transitionMatrix); - } - - void set(std::vector result_vec) { - for (size_t i = 0; i < result_vec.size() && i < state_size*2; ++i) { - kf.statePost.at(i * 2 + 0) = result_vec[i].x; - kf.statePost.at(i * 2 + 1) = result_vec[i].y; - } - } - - // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre); - // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) - std::vector correct(std::vector result_vec) { - cv::Mat measurement(meas_size, 1, CV_32F); - for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) { - measurement.at(i * 2 + 0) = result_vec[i].x; - measurement.at(i * 2 + 1) = result_vec[i].y; - } - cv::Mat estimated = kf.correct(measurement); - for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) { - result_vec[i].x = estimated.at(i * 2 + 0); - result_vec[i].y = estimated.at(i * 2 + 1); - } - return result_vec; - } - - // Kalman.predict() calculates: statePre = TransitionMatrix * statePost; - // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) - std::vector predict() { - std::vector result_vec; - cv::Mat control; - cv::Mat prediction = kf.predict(control); - for (size_t i = 0; i < prediction.rows && i < state_size * 2; ++i) { - result_vec[i].x = prediction.at(i * 2 + 0); - result_vec[i].y = prediction.at(i * 2 + 1); - } - return result_vec; - } - -}; - - - - -class extrapolate_coords_t { -public: - std::vector old_result_vec; - std::vector dx_vec, dy_vec, time_vec; - std::vector old_dx_vec, old_dy_vec; - - void new_result(std::vector new_result_vec, float new_time) { - old_dx_vec = dx_vec; - old_dy_vec = dy_vec; - if (old_dx_vec.size() != old_result_vec.size()) std::cout << "old_dx != old_res \n"; - dx_vec = std::vector(new_result_vec.size(), 0); - dy_vec = std::vector(new_result_vec.size(), 0); - update_result(new_result_vec, new_time, false); - old_result_vec = new_result_vec; - time_vec = std::vector(new_result_vec.size(), new_time); - } - - void update_result(std::vector new_result_vec, float new_time, bool update = true) { - for (size_t i = 0; i < new_result_vec.size(); ++i) { - for (size_t k = 0; k < old_result_vec.size(); ++k) { - if (old_result_vec[k].track_id == new_result_vec[i].track_id && old_result_vec[k].obj_id == new_result_vec[i].obj_id) { - float const delta_time = new_time - time_vec[k]; - if (abs(delta_time) < 1) break; - size_t index = (update) ? k : i; - float dx = ((float)new_result_vec[i].x - (float)old_result_vec[k].x) / delta_time; - float dy = ((float)new_result_vec[i].y - (float)old_result_vec[k].y) / delta_time; - float old_dx = dx, old_dy = dy; - - // if it's shaking - if (update) { - if (dx * dx_vec[i] < 0) dx = dx / 2; - if (dy * dy_vec[i] < 0) dy = dy / 2; - } else { - if (dx * old_dx_vec[k] < 0) dx = dx / 2; - if (dy * old_dy_vec[k] < 0) dy = dy / 2; - } - dx_vec[index] = dx; - dy_vec[index] = dy; - - //if (old_dx == dx && old_dy == dy) std::cout << "not shakin \n"; - //else std::cout << "shakin \n"; - - if (dx_vec[index] > 1000 || dy_vec[index] > 1000) { - //std::cout << "!!! bad dx or dy, dx = " << dx_vec[index] << ", dy = " << dy_vec[index] << - // ", delta_time = " << delta_time << ", update = " << update << std::endl; - dx_vec[index] = 0; - dy_vec[index] = 0; - } - old_result_vec[k].x = new_result_vec[i].x; - old_result_vec[k].y = new_result_vec[i].y; - time_vec[k] = new_time; - break; - } - } - } - } - - std::vector predict(float cur_time) { - std::vector result_vec = old_result_vec; - for (size_t i = 0; i < old_result_vec.size(); ++i) { - float const delta_time = cur_time - time_vec[i]; - auto &bbox = result_vec[i]; - float new_x = (float) bbox.x + dx_vec[i] * delta_time; - float new_y = (float) bbox.y + dy_vec[i] * delta_time; - if (new_x > 0) bbox.x = new_x; - else bbox.x = 0; - if (new_y > 0) bbox.y = new_y; - else bbox.y = 0; - } - return result_vec; - } - -}; - void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector obj_names, int current_det_fps = -1, int current_cap_fps = -1) @@ -190,11 +182,22 @@ void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector 0) obj_name += " - " + std::to_string(i.track_id); cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0); - int const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2); - cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 30, 0)), - cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)), + int max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2); + std::string coords_3d; + if (!isnan(i.z_3d)) { + std::stringstream ss; + ss << std::fixed << std::setprecision(2) << "x:" << i.x_3d << "m y:" << i.y_3d << "m z:" << i.z_3d << "m "; + coords_3d = ss.str(); + cv::Size const text_size_3d = getTextSize(ss.str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, 1, 0); + int const max_width_3d = (text_size_3d.width > i.w + 2) ? text_size_3d.width : (i.w + 2); + if (max_width_3d > max_width) max_width = max_width_3d; + } + + cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 35, 0)), + cv::Point2f(std::min((int)i.x + max_width, mat_img.cols - 1), std::min((int)i.y, mat_img.rows - 1)), color, CV_FILLED, 8, 0); - putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2); + putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 16), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2); + if(!coords_3d.empty()) putText(mat_img, coords_3d, cv::Point2f(i.x, i.y-1), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); } } if (current_det_fps >= 0 && current_cap_fps >= 0) { @@ -205,7 +208,8 @@ void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector const result_vec, std::vector const obj_names) { +void show_console_result(std::vector const result_vec, std::vector const obj_names, int frame_id = -1) { + if (frame_id >= 0) std::cout << " Frame: " << frame_id << std::endl; for (auto &i : result_vec) { if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - "; std::cout << "obj_id = " << i.obj_id << ", x = " << i.x << ", y = " << i.y @@ -223,6 +227,38 @@ std::vector objects_names_from_file(std::string const filename) { return file_lines; } +template +class send_one_replaceable_object_t { + const bool sync; + std::atomic a_ptr; +public: + + void send(T const& _obj) { + T *new_ptr = new T; + *new_ptr = _obj; + if (sync) { + while (a_ptr.load()) std::this_thread::sleep_for(std::chrono::milliseconds(3)); + } + std::unique_ptr old_ptr(a_ptr.exchange(new_ptr)); + } + + T receive() { + std::unique_ptr ptr; + do { + while(!a_ptr.load()) std::this_thread::sleep_for(std::chrono::milliseconds(3)); + ptr.reset(a_ptr.exchange(NULL)); + } while (!ptr); + T obj = *ptr; + return obj; + } + + bool is_object_present() { + return (a_ptr.load() != NULL); + } + + send_one_replaceable_object_t(bool _sync) : sync(_sync), a_ptr(NULL) + {} +}; int main(int argc, char *argv[]) { @@ -239,17 +275,23 @@ int main(int argc, char *argv[]) } else if (argc > 1) filename = argv[1]; - float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.20; + float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.2; Detector detector(cfg_file, weights_file); auto obj_names = objects_names_from_file(names_file); std::string out_videofile = "result.avi"; - bool const save_output_videofile = true; -#ifdef TRACK_OPTFLOW + bool const save_output_videofile = false; // true - for history + bool const send_network = false; // true - for remote detection + bool const use_kalman_filter = false; // true - for stationary camera + + bool detection_sync = true; // true - for video-file +#ifdef TRACK_OPTFLOW // for slow GPU + detection_sync = false; Tracker_optflow tracker_flow; - detector.wait_stream = true; -#endif + //detector.wait_stream = true; +#endif // TRACK_OPTFLOW + while (true) { @@ -259,187 +301,318 @@ int main(int argc, char *argv[]) try { #ifdef OPENCV - extrapolate_coords_t extrapolate_coords; - bool extrapolate_flag = false; - float cur_time_extrapolate = 0, old_time_extrapolate = 0; preview_boxes_t large_preview(100, 150, false), small_preview(50, 50, true); bool show_small_boxes = false; std::string const file_ext = filename.substr(filename.find_last_of(".") + 1); std::string const protocol = filename.substr(0, 7); if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov" || // video file - protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/") // video network stream + protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/" || // video network stream + filename == "zed_camera" || file_ext == "svo" || filename == "web_camera") // ZED stereo camera + { - cv::Mat cap_frame, cur_frame, det_frame, write_frame; - std::queue track_optflow_queue; - int passed_flow_frames = 0; - std::shared_ptr det_image; - std::vector result_vec, thread_result_vec; - detector.nms = 0.02; // comment it - if track_id is not required - std::atomic consumed, videowrite_ready; - bool exit_flag = false; - consumed = true; - videowrite_ready = true; - std::atomic fps_det_counter, fps_cap_counter; - fps_det_counter = 0; - fps_cap_counter = 0; - int current_det_fps = 0, current_cap_fps = 0; - std::thread t_detect, t_cap, t_videowrite; - std::mutex mtx; - std::condition_variable cv_detected, cv_pre_tracked; + if (protocol == "rtsp://" || protocol == "http://" || protocol == "https:/" || filename == "zed_camera" || filename == "web_camera") + detection_sync = false; + + cv::Mat cur_frame; + std::atomic fps_cap_counter(0), fps_det_counter(0); + std::atomic current_fps_cap(0), current_fps_det(0); + std::atomic exit_flag(false); std::chrono::steady_clock::time_point steady_start, steady_end; - cv::VideoCapture cap(filename); cap >> cur_frame; - int const video_fps = cap.get(CV_CAP_PROP_FPS); + int video_fps = 25; + bool use_zed_camera = false; + + track_kalman_t track_kalman; + +#ifdef ZED_STEREO + sl::InitParameters init_params; + init_params.camera_resolution = sl::RESOLUTION_HD720; + init_params.coordinate_units = sl::UNIT_METER; + //init_params.sdk_cuda_ctx = (CUcontext)detector.get_cuda_context(); + init_params.sdk_gpu_id = detector.cur_gpu_id; + init_params.camera_buffer_count_linux = 2; + if (file_ext == "svo") init_params.svo_input_filename.set(filename.c_str()); + if (filename == "zed_camera" || file_ext == "svo") { + std::cout << "ZED 3D Camera " << zed.open(init_params) << std::endl; + cur_frame = zed_capture_rgb(zed); + use_zed_camera = true; + } +#endif // ZED_STEREO + + cv::VideoCapture cap; + if (filename == "web_camera") { + cap.open(0); + video_fps = cap.get(CV_CAP_PROP_FPS); + cap >> cur_frame; + } else if (!use_zed_camera) { + cap.open(filename); + video_fps = cap.get(CV_CAP_PROP_FPS); + cap >> cur_frame; + } cv::Size const frame_size = cur_frame.size(); + //cv::Size const frame_size(cap.get(CV_CAP_PROP_FRAME_WIDTH), cap.get(CV_CAP_PROP_FRAME_HEIGHT)); + std::cout << "\n Video size: " << frame_size << std::endl; + cv::VideoWriter output_video; if (save_output_videofile) output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true); - while (!cur_frame.empty()) + struct detection_data_t { + cv::Mat cap_frame; + std::shared_ptr det_image; + std::vector result_vec; + cv::Mat draw_frame; + bool new_detection; + uint64_t frame_id; + bool exit_flag; + cv::Mat zed_cloud; + std::queue track_optflow_queue; + detection_data_t() : exit_flag(false), new_detection(false) {} + }; + + const bool sync = detection_sync; // sync data exchange + send_one_replaceable_object_t cap2prepare(sync), cap2draw(sync), + prepare2detect(sync), detect2draw(sync), draw2show(sync), draw2write(sync), draw2net(sync); + + std::thread t_cap, t_prepare, t_detect, t_post, t_draw, t_write, t_network; + + // capture new video-frame + if (t_cap.joinable()) t_cap.join(); + t_cap = std::thread([&]() { - // always sync - if (t_cap.joinable()) { - t_cap.join(); - ++fps_cap_counter; - cur_frame = cap_frame.clone(); - } - t_cap = std::thread([&]() { cap >> cap_frame; }); - ++cur_time_extrapolate; - - // swap result bouned-boxes and input-frame - if(consumed) - { - std::unique_lock lock(mtx); - det_image = detector.mat_to_image_resize(cur_frame); - auto old_result_vec = detector.tracking_id(result_vec); - auto detected_result_vec = thread_result_vec; - result_vec = detected_result_vec; -#ifdef TRACK_OPTFLOW - // track optical flow - if (track_optflow_queue.size() > 0) { - //std::cout << "\n !!!! all = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl; - cv::Mat first_frame = track_optflow_queue.front(); - tracker_flow.update_tracking_flow(track_optflow_queue.front(), result_vec); - - while (track_optflow_queue.size() > 1) { - track_optflow_queue.pop(); - result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), true); - } - track_optflow_queue.pop(); - passed_flow_frames = 0; - - result_vec = detector.tracking_id(result_vec); - auto tmp_result_vec = detector.tracking_id(detected_result_vec, false); - small_preview.set(first_frame, tmp_result_vec); - - extrapolate_coords.new_result(tmp_result_vec, old_time_extrapolate); - old_time_extrapolate = cur_time_extrapolate; - extrapolate_coords.update_result(result_vec, cur_time_extrapolate - 1); + uint64_t frame_id = 0; + detection_data_t detection_data; + do { + detection_data = detection_data_t(); +#ifdef ZED_STEREO + if (use_zed_camera) { + while (zed.grab() != sl::SUCCESS) std::this_thread::sleep_for(std::chrono::milliseconds(2)); + detection_data.cap_frame = zed_capture_rgb(zed); + detection_data.zed_cloud = zed_capture_3d(zed); } -#else - result_vec = detector.tracking_id(result_vec); // comment it - if track_id is not required - extrapolate_coords.new_result(result_vec, cur_time_extrapolate - 1); -#endif - // add old tracked objects - for (auto &i : old_result_vec) { - auto it = std::find_if(result_vec.begin(), result_vec.end(), - [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; }); - bool track_id_absent = (it == result_vec.end()); - if (track_id_absent) { - if (i.frames_counter-- > 1) - result_vec.push_back(i); + else +#endif // ZED_STEREO + { + cap >> detection_data.cap_frame; + } + fps_cap_counter++; + detection_data.frame_id = frame_id++; + if (detection_data.cap_frame.empty() || exit_flag) { + std::cout << " exit_flag: detection_data.cap_frame.size = " << detection_data.cap_frame.size() << std::endl; + detection_data.exit_flag = true; + detection_data.cap_frame = cv::Mat(frame_size, CV_8UC3); + } + + if (!detection_sync) { + cap2draw.send(detection_data); // skip detection + } + cap2prepare.send(detection_data); + } while (!detection_data.exit_flag); + std::cout << " t_cap exit \n"; + }); + + + // pre-processing video frame (resize, convertion) + t_prepare = std::thread([&]() + { + std::shared_ptr det_image; + detection_data_t detection_data; + do { + detection_data = cap2prepare.receive(); + + det_image = detector.mat_to_image_resize(detection_data.cap_frame); + detection_data.det_image = det_image; + prepare2detect.send(detection_data); // detection + + } while (!detection_data.exit_flag); + std::cout << " t_prepare exit \n"; + }); + + + // detection by Yolo + if (t_detect.joinable()) t_detect.join(); + t_detect = std::thread([&]() + { + std::shared_ptr det_image; + detection_data_t detection_data; + do { + detection_data = prepare2detect.receive(); + det_image = detection_data.det_image; + std::vector result_vec; + + if(det_image) + result_vec = detector.detect_resized(*det_image, frame_size.width, frame_size.height, thresh, true); // true + fps_det_counter++; + //std::this_thread::sleep_for(std::chrono::milliseconds(150)); + + detection_data.new_detection = true; + detection_data.result_vec = result_vec; + detect2draw.send(detection_data); + } while (!detection_data.exit_flag); + std::cout << " t_detect exit \n"; + }); + + // draw rectangles (and track objects) + t_draw = std::thread([&]() + { + std::queue track_optflow_queue; + detection_data_t detection_data; + do { + + // for Video-file + if (detection_sync) { + detection_data = detect2draw.receive(); + } + // for Video-camera + else + { + // get new Detection result if present + if (detect2draw.is_object_present()) { + cv::Mat old_cap_frame = detection_data.cap_frame; // use old captured frame + detection_data = detect2draw.receive(); + if (!old_cap_frame.empty()) detection_data.cap_frame = old_cap_frame; + } + // get new Captured frame + else { + std::vector old_result_vec = detection_data.result_vec; // use old detections + detection_data = cap2draw.receive(); + detection_data.result_vec = old_result_vec; + } + } + + cv::Mat cap_frame = detection_data.cap_frame; + cv::Mat draw_frame = detection_data.cap_frame.clone(); + std::vector result_vec = detection_data.result_vec; + +#ifdef TRACK_OPTFLOW + if (detection_data.new_detection) { + tracker_flow.update_tracking_flow(detection_data.cap_frame, detection_data.result_vec); + while (track_optflow_queue.size() > 0) { + draw_frame = track_optflow_queue.back(); + result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), false); + track_optflow_queue.pop(); + } + } + else { + track_optflow_queue.push(cap_frame); + result_vec = tracker_flow.tracking_flow(cap_frame, false); + } + detection_data.new_detection = true; // to correct kalman filter +#endif //TRACK_OPTFLOW + + // track ID by using kalman filter + if (use_kalman_filter) { + if (detection_data.new_detection) { + result_vec = track_kalman.correct(result_vec); } else { - it->frames_counter = std::min((unsigned)3, i.frames_counter + 1); + result_vec = track_kalman.predict(); } } -#ifdef TRACK_OPTFLOW - tracker_flow.update_cur_bbox_vec(result_vec); - result_vec = tracker_flow.tracking_flow(cur_frame, true); // track optical flow -#endif - consumed = false; - cv_pre_tracked.notify_all(); - } - // launch thread once - Detection - if (!t_detect.joinable()) { - t_detect = std::thread([&]() { - auto current_image = det_image; - consumed = true; - while (current_image.use_count() > 0 && !exit_flag) { - auto result = detector.detect_resized(*current_image, frame_size.width, frame_size.height, - thresh, false); // true - ++fps_det_counter; - std::unique_lock lock(mtx); - thread_result_vec = result; - consumed = true; - cv_detected.notify_all(); - if (detector.wait_stream) { - while (consumed && !exit_flag) cv_pre_tracked.wait(lock); - } - current_image = det_image; - } - }); - } - //while (!consumed); // sync detection - - if (!cur_frame.empty()) { - steady_end = std::chrono::steady_clock::now(); - if (std::chrono::duration(steady_end - steady_start).count() >= 1) { - current_det_fps = fps_det_counter; - current_cap_fps = fps_cap_counter; - steady_start = steady_end; - fps_det_counter = 0; - fps_cap_counter = 0; + // track ID by using custom function + else { + int frame_story = std::max(5, current_fps_cap.load()); + result_vec = detector.tracking_id(result_vec, true, frame_story, 40); } - large_preview.set(cur_frame, result_vec); -#ifdef TRACK_OPTFLOW - ++passed_flow_frames; - track_optflow_queue.push(cur_frame.clone()); - result_vec = tracker_flow.tracking_flow(cur_frame); // track optical flow - extrapolate_coords.update_result(result_vec, cur_time_extrapolate); - small_preview.draw(cur_frame, show_small_boxes); -#endif - auto result_vec_draw = result_vec; - if (extrapolate_flag) { - result_vec_draw = extrapolate_coords.predict(cur_time_extrapolate); - cv::putText(cur_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2); + if (use_zed_camera && !detection_data.zed_cloud.empty()) { + result_vec = get_3d_coordinates(result_vec, detection_data.zed_cloud); } - draw_boxes(cur_frame, result_vec_draw, obj_names, current_det_fps, current_cap_fps); - //show_console_result(result_vec, obj_names); - large_preview.draw(cur_frame); - cv::imshow("window name", cur_frame); - int key = cv::waitKey(3); // 3 or 16ms - if (key == 'f') show_small_boxes = !show_small_boxes; - if (key == 'p') while (true) if(cv::waitKey(100) == 'p') break; - if (key == 'e') extrapolate_flag = !extrapolate_flag; - if (key == 27) { exit_flag = true; break; } + //small_preview.set(draw_frame, result_vec); + //large_preview.set(draw_frame, result_vec); + draw_boxes(draw_frame, result_vec, obj_names, current_fps_det, current_fps_cap); + //show_console_result(result_vec, obj_names, detection_data.frame_id); + //large_preview.draw(draw_frame); + //small_preview.draw(draw_frame, true); - if (output_video.isOpened() && videowrite_ready) { - if (t_videowrite.joinable()) t_videowrite.join(); - write_frame = cur_frame.clone(); - videowrite_ready = false; - t_videowrite = std::thread([&]() { - output_video << write_frame; videowrite_ready = true; - }); - } + detection_data.result_vec = result_vec; + detection_data.draw_frame = draw_frame; + draw2show.send(detection_data); + if (send_network) draw2net.send(detection_data); + if (output_video.isOpened()) draw2write.send(detection_data); + } while (!detection_data.exit_flag); + std::cout << " t_draw exit \n"; + }); + + + // write frame to videofile + t_write = std::thread([&]() + { + if (output_video.isOpened()) { + detection_data_t detection_data; + cv::Mat output_frame; + do { + detection_data = draw2write.receive(); + if(detection_data.draw_frame.channels() == 4) cv::cvtColor(detection_data.draw_frame, output_frame, CV_RGBA2RGB); + else output_frame = detection_data.draw_frame; + output_video << output_frame; + } while (!detection_data.exit_flag); + output_video.release(); + } + std::cout << " t_write exit \n"; + }); + + // send detection to the network + t_network = std::thread([&]() + { + if (send_network) { + detection_data_t detection_data; + do { + detection_data = draw2net.receive(); + + detector.send_json_http(detection_data.result_vec, obj_names, detection_data.frame_id, filename); + + } while (!detection_data.exit_flag); + } + std::cout << " t_network exit \n"; + }); + + + // show detection + detection_data_t detection_data; + do { + + steady_end = std::chrono::steady_clock::now(); + float time_sec = std::chrono::duration(steady_end - steady_start).count(); + if (time_sec >= 1) { + current_fps_det = fps_det_counter.load() / time_sec; + current_fps_cap = fps_cap_counter.load() / time_sec; + steady_start = steady_end; + fps_det_counter = 0; + fps_cap_counter = 0; } -#ifndef TRACK_OPTFLOW - // wait detection result for video-file only (not for net-cam) - if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") { - std::unique_lock lock(mtx); - while (!consumed) cv_detected.wait(lock); - } -#endif - } - exit_flag = true; + detection_data = draw2show.receive(); + cv::Mat draw_frame = detection_data.draw_frame; + + //if (extrapolate_flag) { + // cv::putText(draw_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2); + //} + + cv::imshow("window name", draw_frame); + int key = cv::waitKey(3); // 3 or 16ms + if (key == 'f') show_small_boxes = !show_small_boxes; + if (key == 'p') while (true) if (cv::waitKey(100) == 'p') break; + //if (key == 'e') extrapolate_flag = !extrapolate_flag; + if (key == 27) { exit_flag = true;} + + //std::cout << " current_fps_det = " << current_fps_det << ", current_fps_cap = " << current_fps_cap << std::endl; + } while (!detection_data.exit_flag); + std::cout << " show detection exit \n"; + + cv::destroyWindow("window name"); + // wait for all threads if (t_cap.joinable()) t_cap.join(); + if (t_prepare.joinable()) t_prepare.join(); if (t_detect.joinable()) t_detect.join(); - if (t_videowrite.joinable()) t_videowrite.join(); - std::cout << "Video ended \n"; + if (t_post.joinable()) t_post.join(); + if (t_draw.joinable()) t_draw.join(); + if (t_write.joinable()) t_write.join(); + if (t_network.joinable()) t_network.join(); + break; + } else if (file_ext == "txt") { // list of image files std::ifstream file(filename); @@ -470,14 +643,14 @@ int main(int argc, char *argv[]) show_console_result(result_vec, obj_names); cv::waitKey(0); } -#else +#else // OPENCV //std::vector result_vec = detector.detect(filename); auto img = detector.load_image(filename); std::vector result_vec = detector.detect(img); detector.free_image(img); show_console_result(result_vec, obj_names); -#endif +#endif // OPENCV } catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); } catch (...) { std::cerr << "unknown exception \n"; getchar(); } diff --git a/src/yolo_layer.c b/src/yolo_layer.c index a9309d1d..d303b5aa 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -2,7 +2,7 @@ #include "activations.h" #include "blas.h" #include "box.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp index 1b07b43b..a7418c16 100644 --- a/src/yolo_v2_class.cpp +++ b/src/yolo_v2_class.cpp @@ -250,8 +250,6 @@ LIB_API std::vector Detector::detect(image_t img, float thresh, bool use #endif //std::cout << "net.gpu_index = " << net.gpu_index << std::endl; - //float nms = .4; - image im; im.c = img.c; im.data = img.data; @@ -305,6 +303,9 @@ LIB_API std::vector Detector::detect(image_t img, float thresh, bool use bbox.obj_id = obj_id; bbox.prob = prob; bbox.track_id = 0; + bbox.x_3d = NAN; + bbox.y_3d = NAN; + bbox.z_3d = NAN; bbox_vec.push_back(bbox); } @@ -379,3 +380,70 @@ LIB_API std::vector Detector::tracking_id(std::vector cur_bbox_v return cur_bbox_vec; } + + +LIB_API bool Detector::send_json_http(std::vector cur_bbox_vec, std::vector obj_names, int frame_id, std::string filename, int timeout, int port) +{ + //int timeout = 400000; + //int port = 8070; + //send_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, demo_json_port, timeout); + + std::string send_str; + + char *tmp_buf = (char *)calloc(1024, sizeof(char)); + if (!filename.empty()) { + sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename); + } + else { + sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"objects\": [ \n", frame_id); + } + send_str = tmp_buf; + free(tmp_buf); + + for (auto & i : cur_bbox_vec) { + char *buf = (char *)calloc(2048, sizeof(char)); + + sprintf(buf, " {\"class_id\":%d, \"name\":\"%s\", \"absolute_coordinates\":{\"center_x\":%d, \"center_y\":%d, \"width\":%d, \"height\":%d}, \"confidence\":%f", + i.obj_id, obj_names[i.obj_id], i.x, i.y, i.w, i.h, i.prob); + + //sprintf(buf, " {\"class_id\":%d, \"name\":\"%s\", \"relative_coordinates\":{\"center_x\":%f, \"center_y\":%f, \"width\":%f, \"height\":%f}, \"confidence\":%f", + // i.obj_id, obj_names[i.obj_id], i.x, i.y, i.w, i.h, i.prob); + + send_str += buf; + + if (!isnan(i.z_3d)) { + sprintf(buf, "\n , \"coordinates_in_meters\":{\"x_3d\":%.2f, \"y_3d\":%.2f, \"z_3d\":%.2f}", + i.x_3d*100, i.y_3d, i.z_3d); + send_str += buf; + } + + send_str += "}\n"; + + free(buf); + } + + //send_str += "\n ] \n}, \n"; + send_str += "\n ] \n}"; + + send_json_custom(send_str.c_str(), port, timeout); + return true; +} + +void *Detector::get_cuda_context() +{ +#ifdef GPU + int old_gpu_index; + cudaGetDevice(&old_gpu_index); + if (cur_gpu_id != old_gpu_index) + cudaSetDevice(cur_gpu_id); + + void *cuda_context = cuda_get_context(); + + if (cur_gpu_id != old_gpu_index) + cudaSetDevice(old_gpu_index); + + return cuda_context; +#else // GPU + return NULL; +#endif // GPU +} \ No newline at end of file