diff --git a/src/detector.c b/src/detector.c
index 7d504122..5f2cec83 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -399,7 +399,7 @@ void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile)
     int m = plist->size;
     int i=0;
 
-	float thresh = .2;// .001;
+	float thresh = .001;// .001;	// .2;
     float iou_thresh = .5;
     float nms = .4;
 
diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp
index 25497ce2..ebafe110 100644
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@@ -2,6 +2,7 @@
 #include <iomanip> 
 #include <string>
 #include <vector>
+#include <queue>
 #include <fstream>
 #include <thread>
 #include <atomic>
@@ -10,8 +11,10 @@
 
 #ifdef _WIN32
 #define OPENCV
+#include "windows.h"
 #endif
 
+#define TRACK_OPTFLOW
 #include "yolo_v2_class.hpp"	// imported functions from DLL
 
 #ifdef OPENCV
@@ -21,6 +24,11 @@
 #include "opencv2/videoio/videoio.hpp"
 #define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)""CVAUX_STR(CV_VERSION_MINOR)""CVAUX_STR(CV_VERSION_REVISION)
 #pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib")
+#pragma comment(lib, "opencv_cudaoptflow" OPENCV_VERSION ".lib")
+#pragma comment(lib, "opencv_cudaimgproc" OPENCV_VERSION ".lib")
+#pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib")
+#pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib")
+#pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib")
 #else
 #define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)""CVAUX_STR(CV_VERSION_MAJOR)""CVAUX_STR(CV_VERSION_MINOR)
 #pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib")
@@ -85,11 +93,15 @@ int main(int argc, char *argv[])
 	std::string filename;
 	if (argc > 1) filename = argv[1];
 
-	Detector detector("cfg/yolo-voc.cfg", "yolo-voc.weights");
+	//Detector detector("cfg/yolo-voc.cfg", "yolo-voc.weights");
+	Detector detector("tiny-yolo-voc_air.cfg", "backup/tiny-yolo-voc_air_5000.weights");
 
 	auto obj_names = objects_names_from_file("data/voc.names");
 	std::string out_videofile = "result.avi";
 	bool const save_output_videofile = false;
+#ifdef TRACK_OPTFLOW
+	Tracker_optflow tracker_flow;
+#endif
 
 	while (true) 
 	{		
@@ -105,6 +117,8 @@ int main(int argc, char *argv[])
 				protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/")	// video network stream
 			{
 				cv::Mat cap_frame, cur_frame, det_frame, write_frame;
+				std::queue<cv::Mat> track_optflow_queue;
+				int passed_flow_frames = 0;
 				std::shared_ptr<image_t> det_image;
 				std::vector<bbox_t> result_vec, thread_result_vec;
 				detector.nms = 0.02;	// comment it - if track_id is not required
@@ -126,7 +140,9 @@ int main(int argc, char *argv[])
 				if (save_output_videofile)
 					output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true);
 
-				while (!cur_frame.empty()) {
+				while (!cur_frame.empty()) 
+				{
+					// always sync
 					if (t_cap.joinable()) {
 						t_cap.join();
 						++fps_cap_counter;
@@ -134,22 +150,79 @@ int main(int argc, char *argv[])
 					}
 					t_cap = std::thread([&]() { cap >> cap_frame; });
 
-					// swap result and input-frame
+					// swap result bouned-boxes and input-frame
 					if(consumed)
 					{
-						std::unique_lock<std::mutex> lock(mtx);
-						det_image = detector.mat_to_image_resize(cur_frame);
-						result_vec = thread_result_vec;
-						result_vec = detector.tracking(result_vec);	// comment it - if track_id is not required
-						consumed = false;
+						{
+							std::unique_lock<std::mutex> lock(mtx);
+							det_image = detector.mat_to_image_resize(cur_frame);
+							result_vec = thread_result_vec;
+							result_vec = detector.tracking(result_vec);	// comment it - if track_id is not required
+
+							consumed = false;
+						}
+
+#ifdef TRACK_OPTFLOW
+						int y = 0, x = 0;
+						cv::Mat show_flow = cur_frame.clone();
+						auto lambda = [&x, &y](cv::Mat draw_frame, cv::Mat src_frame, std::vector<bbox_t> result_vec) {
+							//if (x > 10) return;
+							if (result_vec.size() == 0) return;
+							bbox_t i = result_vec[0];
+							//cv::Rect r(i.x, i.y, i.w, i.h);
+							cv::Rect r(i.x + (i.w-31)/2, i.y + (i.h - 31)/2, 31, 31);
+							cv::Rect img_rect(cv::Point2i(0, 0), src_frame.size());
+							cv::Rect rect_roi = r & img_rect;
+							if (rect_roi.width < 1 || rect_roi.height < 1) return;
+							cv::Mat roi = src_frame(rect_roi);
+							cv::Mat dst;
+							cv::resize(roi, dst, cv::Size(100, 100));
+							if (x > 10) x = 0, ++y;
+							cv::Rect dst_rect_roi(cv::Point2i(x*100, y*100), dst.size());
+							cv::Mat dst_roi = draw_frame(dst_rect_roi);
+							dst.copyTo(dst_roi);
+
+							++x;
+						};
+
+
+						// track optical flow
+						if (track_optflow_queue.size() > 0) {
+							std::queue<cv::Mat> new_track_optflow_queue;
+							std::cout << "\n !!!! all = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl;
+							//draw_boxes(track_optflow_queue.front().clone(), result_vec, obj_names, 3, current_det_fps, current_cap_fps);
+							//cv::waitKey(10);
+							tracker_flow.update_tracking_flow(track_optflow_queue.front());
+							lambda(show_flow, track_optflow_queue.front(), result_vec);
+							track_optflow_queue.pop();
+							while(track_optflow_queue.size() > 0) {
+								//draw_boxes(track_optflow_queue.front().clone(), result_vec, obj_names, 3, current_det_fps, current_cap_fps);
+								//cv::waitKey(10);
+								result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), result_vec);
+								if (track_optflow_queue.size() <= passed_flow_frames && new_track_optflow_queue.size() == 0)
+									new_track_optflow_queue = track_optflow_queue;
+								lambda(show_flow, track_optflow_queue.front(), result_vec);
+								track_optflow_queue.pop();
+							}					
+							track_optflow_queue = new_track_optflow_queue;
+							new_track_optflow_queue.swap(std::queue<cv::Mat>());
+							passed_flow_frames = 0;
+							std::cout << "\n !!!! now = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl;
+
+							cv::imshow("flow", show_flow);
+							cv::waitKey(3);
+						}
+#endif
+
 					}
-					// launch thread once
+					// launch thread once - Detection
 					if (!t_detect.joinable()) {
 						t_detect = std::thread([&]() {
 							auto current_image = det_image;
 							consumed = true;
 							while (current_image.use_count() > 0) {
-								auto result = detector.detect_resized(*current_image, frame_size, 0.24, true);
+								auto result = detector.detect_resized(*current_image, frame_size, 0.24, false);	// true
+								Sleep(500);
 								++fps_det_counter;
 								std::unique_lock<std::mutex> lock(mtx);
 								thread_result_vec = result;
@@ -169,6 +242,13 @@ int main(int argc, char *argv[])
 							fps_det_counter = 0;
 							fps_cap_counter = 0;
 						}
+
+#ifdef TRACK_OPTFLOW
+						++passed_flow_frames;
+						track_optflow_queue.push(cur_frame.clone());
+						result_vec = tracker_flow.tracking_flow(cur_frame, result_vec);	// track optical flow
+#endif
+
 						draw_boxes(cur_frame, result_vec, obj_names, 3, current_det_fps, current_cap_fps);
 						//show_console_result(result_vec, obj_names);
 
@@ -183,10 +263,10 @@ int main(int argc, char *argv[])
 					}
 
 					// wait detection result for video-file only (not for net-cam)
-					if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") {
-						std::unique_lock<std::mutex> lock(mtx);
-						while (!consumed) cv.wait(lock);
-					}
+					//if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") {
+					//	std::unique_lock<std::mutex> lock(mtx);
+					//	while (!consumed) cv.wait(lock);
+					//}
 				}
 				if (t_cap.joinable()) t_cap.join();
 				if (t_detect.joinable()) t_detect.join();
diff --git a/src/yolo_v2_class.hpp b/src/yolo_v2_class.hpp
index a52d3ac4..ab446ce9 100644
--- a/src/yolo_v2_class.hpp
+++ b/src/yolo_v2_class.hpp
@@ -8,6 +8,11 @@
 #include <opencv2/opencv.hpp>			// C++
 #include "opencv2/highgui/highgui_c.h"	// C
 #include "opencv2/imgproc/imgproc_c.h"	// C
+
+#include <opencv2/cudaoptflow.hpp>
+#include <opencv2/cudaimgproc.hpp>
+#include <opencv2/cudaarithm.hpp>
+#include "opencv2/core/cuda.hpp"
 #endif	// OPENCV
 
 #ifdef YOLODLL_EXPORTS
@@ -41,6 +46,7 @@ struct image_t {
 
 class Detector {
 	std::shared_ptr<void> detector_gpu_ptr;
+	std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
 public:
 	float nms = .4;
 
@@ -93,6 +99,7 @@ public:
 	}
 
 private:
+
 	static image_t ipl_to_image(IplImage* src)
 	{
 		unsigned char *data = (unsigned char *)src->imageData;
@@ -142,8 +149,165 @@ private:
 
 #endif	// OPENCV
 
-	std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
 };
 
 
+#if defined(TRACK_OPTFLOW) && defined(OPENCV)
+
+class Tracker_optflow {
+public:
+
+
+	// just to avoid extra allocations
+	cv::cuda::GpuMat src_mat_gpu;
+	cv::cuda::GpuMat dst_mat_gpu, dst_grey_gpu;
+	cv::cuda::GpuMat tmp_grey_gpu;
+	cv::cuda::GpuMat prev_pts_flow_gpu, cur_pts_flow_gpu;
+	cv::cuda::GpuMat status_gpu, err_gpu;
+
+	cv::cuda::GpuMat src_grey_gpu;	// used in both functions
+	cv::Ptr<cv::cuda::SparsePyrLKOpticalFlow> sync_PyrLKOpticalFlow_gpu;
+
+	void update_tracking_flow(cv::Mat src_mat, int gpu_id = 0)
+	{
+		int const old_gpu_id = cv::cuda::getDevice();
+		static const int gpu_count = cv::cuda::getCudaEnabledDeviceCount();
+		if (gpu_count > gpu_id)
+			cv::cuda::setDevice(gpu_id);
+
+		cv::cuda::Stream stream;
+
+		if (sync_PyrLKOpticalFlow_gpu.empty()) {
+			sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create();
+
+			//sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(31, 31)); //sync_PyrLKOpticalFlow_gpu.winSize = cv::Size(31, 31);
+			//sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(15, 15)); //sync_PyrLKOpticalFlow_gpu.winSize = cv::Size(15, 15);
+			sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21));
+			sync_PyrLKOpticalFlow_gpu->setMaxLevel(3);	//sync_PyrLKOpticalFlow_gpu.maxLevel = 8;	// +-32 points	// def: 3
+			sync_PyrLKOpticalFlow_gpu->setNumIters(6000);	//sync_PyrLKOpticalFlow_gpu.iters = 8000;	// def: 30
+			//??? //sync_PyrLKOpticalFlow_gpu.getMinEigenVals = true;
+			//std::cout << "sync_PyrLKOpticalFlow_gpu.maxLevel: " << sync_PyrLKOpticalFlow_gpu.maxLevel << std::endl;
+			//std::cout << "sync_PyrLKOpticalFlow_gpu.iters: " << sync_PyrLKOpticalFlow_gpu.iters << std::endl;
+			//std::cout << "sync_PyrLKOpticalFlow_gpu.winSize: " << sync_PyrLKOpticalFlow_gpu.winSize << std::endl;
+		}
+
+		if (src_mat.channels() == 3) {
+			if (src_mat_gpu.cols == 0) {
+				src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type());
+				src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1);
+			}
+
+			src_mat_gpu.upload(src_mat, stream);
+			cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream);
+			//std::cout << " \n\n OK !!! \n\n";
+		}
+		cv::cuda::setDevice(old_gpu_id);
+	}
+
+
+	std::vector<bbox_t> tracking_flow(cv::Mat dst_mat, std::vector<bbox_t> cur_bbox_vec, int gpu_id = 0)
+	{
+		if (sync_PyrLKOpticalFlow_gpu.empty()) {
+			std::cout << "sync_PyrLKOpticalFlow_gpu isn't initialized \n";
+			return cur_bbox_vec;
+		}
+
+		int const old_gpu_id = cv::cuda::getDevice();
+		static const int gpu_count = cv::cuda::getCudaEnabledDeviceCount();
+		if (gpu_count > gpu_id)
+			cv::cuda::setDevice(gpu_id);
+
+		cv::cuda::Stream stream;
+
+		if (dst_mat_gpu.cols == 0) {
+			dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
+			dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
+			tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
+		}
+
+
+		dst_mat_gpu.upload(dst_mat, stream);
+
+		cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream);
+
+		if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
+			stream.waitForCompletion();
+			src_grey_gpu = dst_grey_gpu.clone();
+			cv::cuda::setDevice(old_gpu_id);
+			return cur_bbox_vec;
+		}
+
+		cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu;
+
+		for (auto &i : cur_bbox_vec) {
+			float x_center = (i.x + i.w / 2);
+			float y_center = (i.y + i.h / 2);
+			prev_pts.push_back(cv::Point2f(x_center, y_center));
+		}
+
+
+		if (prev_pts.rows == 0)
+			prev_pts_flow_cpu = cv::Mat();
+		else
+			cv::transpose(prev_pts, prev_pts_flow_cpu);
+
+
+		if (prev_pts_flow_gpu.cols < prev_pts_flow_cpu.cols) {
+			prev_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type());
+			cur_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type());
+
+			status_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_8UC1);
+			err_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_32FC1);
+		}
+
+		prev_pts_flow_gpu.upload(cv::Mat(prev_pts_flow_cpu), stream);
+
+
+		dst_grey_gpu.copyTo(tmp_grey_gpu, stream);
+
+		//sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu);	// OpenCV 2.4.x
+		sync_PyrLKOpticalFlow_gpu->calc(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, err_gpu, stream);	// OpenCV 3.x
+																																		//std::cout << "\n 1-e \n";
+
+		cur_pts_flow_gpu.download(cur_pts_flow_cpu, stream);
+
+		tmp_grey_gpu.copyTo(src_grey_gpu, stream);
+
+		cv::Mat err_cpu, status_cpu;
+		err_gpu.download(err_cpu, stream);
+		status_gpu.download(status_cpu, stream);
+
+		stream.waitForCompletion();
+
+		std::vector<bbox_t> result_bbox_vec;
+
+		for (size_t i = 0; i < cur_bbox_vec.size(); ++i)
+		{
+			cv::Point2f cur_key_pt = cur_pts_flow_cpu.at<cv::Point2f>(0, i);
+			cv::Point2f prev_key_pt = prev_pts_flow_cpu.at<cv::Point2f>(0, i);
+
+			float moved_x = cur_key_pt.x - prev_key_pt.x;
+			float moved_y = cur_key_pt.y - prev_key_pt.y;
+
+			if (err_cpu.cols > i &&  status_cpu.cols > i)
+				if (abs(moved_x) < 100 && abs(moved_y) < 100)
+					//if (err_cpu.at<float>(0, i) < 60 && status_cpu.at<unsigned char>(0, i) != 0)
+					{
+						cur_bbox_vec[i].x += moved_x + 0.5;
+						cur_bbox_vec[i].y += moved_y + 0.5;
+						result_bbox_vec.push_back(cur_bbox_vec[i]);
+					}
+		}
+
+		cv::cuda::setDevice(old_gpu_id);
+
+		return result_bbox_vec;
+	}
+
+};
+#else
+
+class Tracker_optflow {};
+
+#endif	// defined(TRACK_OPTFLOW) && defined(OPENCV)