diff --git a/src/.editorconfig b/src/.editorconfig new file mode 100644 index 00000000..2eb162b2 --- /dev/null +++ b/src/.editorconfig @@ -0,0 +1,8 @@ +root=true + +[*] +trim_trailing_whitespace = true +indent_style = space +indent_size = 4 + + diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c index d35d9d2e..3fa129db 100644 --- a/src/batchnorm_layer.c +++ b/src/batchnorm_layer.c @@ -53,10 +53,10 @@ layer make_batchnorm_layer(int batch, int w, int h, int c) layer.x_gpu = cuda_make_array(layer.output, layer.batch*layer.outputs); layer.x_norm_gpu = cuda_make_array(layer.output, layer.batch*layer.outputs); #ifdef CUDNN - cudnnCreateTensorDescriptor(&layer.normTensorDesc); - cudnnCreateTensorDescriptor(&layer.normDstTensorDesc); - cudnnSetTensor4dDescriptor(layer.normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, layer.batch, layer.out_c, layer.out_h, layer.out_w); - cudnnSetTensor4dDescriptor(layer.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, layer.out_c, 1, 1); + cudnnCreateTensorDescriptor(&layer.normTensorDesc); + cudnnCreateTensorDescriptor(&layer.normDstTensorDesc); + cudnnSetTensor4dDescriptor(layer.normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, layer.batch, layer.out_c, layer.out_h, layer.out_w); + cudnnSetTensor4dDescriptor(layer.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, layer.out_c, 1, 1); #endif #endif return layer; @@ -179,93 +179,93 @@ void push_batchnorm_layer(layer l) void forward_batchnorm_layer_gpu(layer l, network_state state) { - if (l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); - if (state.train) { + if (l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); + copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + if (state.train) { #ifdef CUDNN - float one = 1; - float zero = 0; - cudnnBatchNormalizationForwardTraining(cudnn_handle(), - CUDNN_BATCHNORM_SPATIAL, - &one, - &zero, - l.normDstTensorDesc, - l.x_gpu, // input - l.normDstTensorDesc, - l.output_gpu, // output - l.normTensorDesc, - l.scales_gpu, - l.biases_gpu, - .01, - l.rolling_mean_gpu, // output (should be FP32) - l.rolling_variance_gpu, // output (should be FP32) - .00001, - l.mean_gpu, // output (should be FP32) - l.variance_gpu); // output (should be FP32) + float one = 1; + float zero = 0; + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l.normDstTensorDesc, + l.x_gpu, // input + l.normDstTensorDesc, + l.output_gpu, // output + l.normTensorDesc, + l.scales_gpu, + l.biases_gpu, + .01, + l.rolling_mean_gpu, // output (should be FP32) + l.rolling_variance_gpu, // output (should be FP32) + .00001, + l.mean_gpu, // output (should be FP32) + l.variance_gpu); // output (should be FP32) #else - fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); - fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); + fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); + fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); - scal_ongpu(l.out_c, .99, l.rolling_mean_gpu, 1); - axpy_ongpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); - scal_ongpu(l.out_c, .99, l.rolling_variance_gpu, 1); - axpy_ongpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); + scal_ongpu(l.out_c, .99, l.rolling_mean_gpu, 1); + axpy_ongpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); + scal_ongpu(l.out_c, .99, l.rolling_variance_gpu, 1); + axpy_ongpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); - normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); - copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); + copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); - scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); #endif - } - else { - normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); - scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); - } + } + else { + normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + } } void backward_batchnorm_layer_gpu(layer l, network_state state) { - if (!state.train) { - l.mean_gpu = l.rolling_mean_gpu; - l.variance_gpu = l.rolling_variance_gpu; - } + if (!state.train) { + l.mean_gpu = l.rolling_mean_gpu; + l.variance_gpu = l.rolling_variance_gpu; + } #ifdef CUDNN - float one = 1; - float zero = 0; - cudnnBatchNormalizationBackward(cudnn_handle(), - CUDNN_BATCHNORM_SPATIAL, - &one, - &zero, - &one, - &one, - l.normDstTensorDesc, - l.x_gpu, // input - l.normDstTensorDesc, - l.delta_gpu, // input - l.normDstTensorDesc, - l.x_norm_gpu, // output - l.normTensorDesc, - l.scales_gpu, // output (should be FP32) - l.scale_updates_gpu, // output (should be FP32) - l.bias_updates_gpu, // output (should be FP32) - .00001, - l.mean_gpu, // input (should be FP32) - l.variance_gpu); // input (should be FP32) - copy_ongpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1); + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l.normDstTensorDesc, + l.x_gpu, // input + l.normDstTensorDesc, + l.delta_gpu, // input + l.normDstTensorDesc, + l.x_norm_gpu, // output + l.normTensorDesc, + l.scales_gpu, // output (should be FP32) + l.scale_updates_gpu, // output (should be FP32) + l.bias_updates_gpu, // output (should be FP32) + .00001, + l.mean_gpu, // input (should be FP32) + l.variance_gpu); // input (should be FP32) + copy_ongpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1); #else - backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); - backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); - scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); - fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); - normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); + fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); + fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); + normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); #endif - if (l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); + if (l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); } #endif \ No newline at end of file diff --git a/src/blas.c b/src/blas.c index 4ff0b834..ccf05223 100644 --- a/src/blas.c +++ b/src/blas.c @@ -11,8 +11,8 @@ void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride, int b,i,j,k; int in_c = out_c/(stride*stride); - //printf("\n out_c = %d, out_w = %d, out_h = %d, stride = %d, forward = %d \n", out_c, out_w, out_h, stride, forward); - //printf(" in_c = %d, in_w = %d, in_h = %d \n", in_c, out_w*stride, out_h*stride); + //printf("\n out_c = %d, out_w = %d, out_h = %d, stride = %d, forward = %d \n", out_c, out_w, out_h, stride, forward); + //printf(" in_c = %d, in_w = %d, in_h = %d \n", in_c, out_w*stride, out_h*stride); for(b = 0; b < batch; ++b){ for(k = 0; k < out_c; ++k){ @@ -24,7 +24,7 @@ void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride, int w2 = i*stride + offset % stride; int h2 = j*stride + offset / stride; int out_index = w2 + out_w*stride*(h2 + out_h*stride*(c2 + in_c*b)); - if(forward) out[out_index] = x[in_index]; // used by default for forward (i.e. forward = 0) + if(forward) out[out_index] = x[in_index]; // used by default for forward (i.e. forward = 0) else out[in_index] = x[out_index]; } } @@ -293,17 +293,17 @@ void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, i void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) { - int i, j, k, b; - for (b = 0; b < batch; ++b) { - for (k = 0; k < c; ++k) { - for (j = 0; j < h*stride; ++j) { - for (i = 0; i < w*stride; ++i) { - int in_index = b*w*h*c + k*w*h + (j / stride)*w + i / stride; - int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; - if (forward) out[out_index] = scale*in[in_index]; - else in[in_index] += scale*out[out_index]; - } - } - } - } + int i, j, k, b; + for (b = 0; b < batch; ++b) { + for (k = 0; k < c; ++k) { + for (j = 0; j < h*stride; ++j) { + for (i = 0; i < w*stride; ++i) { + int in_index = b*w*h*c + k*w*h + (j / stride)*w + i / stride; + int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; + if (forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } } \ No newline at end of file diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 1edbbbde..34c0008b 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -157,16 +157,16 @@ extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2 extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) { - scal_ongpu(n, B1, m, 1); - scal_ongpu(n, B2, v, 1); - axpy_ongpu(n, -decay*batch, w, 1, d, 1); + scal_ongpu(n, B1, m, 1); + scal_ongpu(n, B2, v, 1); + axpy_ongpu(n, -decay*batch, w, 1, d, 1); - axpy_ongpu(n, (1 - B1), d, 1, m, 1); - mul_ongpu(n, d, 1, d, 1); - axpy_ongpu(n, (1 - B2), d, 1, v, 1); + axpy_ongpu(n, (1 - B1), d, 1, m, 1); + mul_ongpu(n, d, 1, d, 1); + axpy_ongpu(n, (1 - B2), d, 1, v, 1); - adam_gpu(n, w, m, v, B1, B2, rate, eps, t); - fill_ongpu(n, 0, d, 1); + adam_gpu(n, w, m, v, B1, B2, rate, eps, t); + fill_ongpu(n, 0, d, 1); } __global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) @@ -237,7 +237,7 @@ __global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, local[id] += (i+id < spatial) ? delta[index] : 0; } } - __syncthreads(); + __syncthreads(); if(id == 0){ mean_delta[filter] = 0; @@ -266,7 +266,7 @@ __global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; } } - __syncthreads(); + __syncthreads(); if(id == 0){ variance_delta[filter] = 0; @@ -462,7 +462,7 @@ __global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, local[id] += (i+id < spatial) ? x[index] : 0; } } - __syncthreads(); + __syncthreads(); if(id == 0){ mean[filter] = 0; @@ -491,7 +491,7 @@ __global__ void fast_variance_kernel(float *x, float *mean, int batch, int filt local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; } } - __syncthreads(); + __syncthreads(); if(id == 0){ variance[filter] = 0; @@ -787,31 +787,31 @@ extern "C" void softmax_gpu(float *input, int n, int offset, int groups, float t __global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) { - size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i >= N) return; - int out_index = i; - int out_w = i % (w*stride); - i = i / (w*stride); - int out_h = i % (h*stride); - i = i / (h*stride); - int out_c = i%c; - i = i / c; - int b = i%batch; + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= N) return; + int out_index = i; + int out_w = i % (w*stride); + i = i / (w*stride); + int out_h = i % (h*stride); + i = i / (h*stride); + int out_c = i%c; + i = i / c; + int b = i%batch; - int in_w = out_w / stride; - int in_h = out_h / stride; - int in_c = out_c; + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; - int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; - if (forward) out[out_index] += scale * x[in_index]; - else atomicAdd(x + in_index, scale * out[out_index]); + if (forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x + in_index, scale * out[out_index]); } extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) { - size_t size = w*h*c*batch*stride*stride; - upsample_kernel << > >(size, in, w, h, c, batch, stride, forward, scale, out); - check_error(cudaPeekAtLastError()); + size_t size = w*h*c*batch*stride*stride; + upsample_kernel << > >(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); } \ No newline at end of file diff --git a/src/box.c b/src/box.c index a2c676f2..718215fe 100644 --- a/src/box.c +++ b/src/box.c @@ -278,88 +278,88 @@ void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thr int nms_comparator_v3(const void *pa, const void *pb) { - detection a = *(detection *)pa; - detection b = *(detection *)pb; - float diff = 0; - if (b.sort_class >= 0) { - diff = a.prob[b.sort_class] - b.prob[b.sort_class]; - } - else { - diff = a.objectness - b.objectness; - } - if (diff < 0) return 1; - else if (diff > 0) return -1; - return 0; + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if (b.sort_class >= 0) { + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } + else { + diff = a.objectness - b.objectness; + } + if (diff < 0) return 1; + else if (diff > 0) return -1; + return 0; } void do_nms_obj(detection *dets, int total, int classes, float thresh) { - int i, j, k; - k = total - 1; - for (i = 0; i <= k; ++i) { - if (dets[i].objectness == 0) { - detection swap = dets[i]; - dets[i] = dets[k]; - dets[k] = swap; - --k; - --i; - } - } - total = k + 1; + int i, j, k; + k = total - 1; + for (i = 0; i <= k; ++i) { + if (dets[i].objectness == 0) { + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k + 1; - for (i = 0; i < total; ++i) { - dets[i].sort_class = -1; - } + for (i = 0; i < total; ++i) { + dets[i].sort_class = -1; + } - qsort(dets, total, sizeof(detection), nms_comparator_v3); - for (i = 0; i < total; ++i) { - if (dets[i].objectness == 0) continue; - box a = dets[i].bbox; - for (j = i + 1; j < total; ++j) { - if (dets[j].objectness == 0) continue; - box b = dets[j].bbox; - if (box_iou(a, b) > thresh) { - dets[j].objectness = 0; - for (k = 0; k < classes; ++k) { - dets[j].prob[k] = 0; - } - } - } - } + qsort(dets, total, sizeof(detection), nms_comparator_v3); + for (i = 0; i < total; ++i) { + if (dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for (j = i + 1; j < total; ++j) { + if (dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh) { + dets[j].objectness = 0; + for (k = 0; k < classes; ++k) { + dets[j].prob[k] = 0; + } + } + } + } } void do_nms_sort(detection *dets, int total, int classes, float thresh) { - int i, j, k; - k = total - 1; - for (i = 0; i <= k; ++i) { - if (dets[i].objectness == 0) { - detection swap = dets[i]; - dets[i] = dets[k]; - dets[k] = swap; - --k; - --i; - } - } - total = k + 1; + int i, j, k; + k = total - 1; + for (i = 0; i <= k; ++i) { + if (dets[i].objectness == 0) { + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k + 1; - for (k = 0; k < classes; ++k) { - for (i = 0; i < total; ++i) { - dets[i].sort_class = k; - } - qsort(dets, total, sizeof(detection), nms_comparator_v3); - for (i = 0; i < total; ++i) { - //printf(" k = %d, \t i = %d \n", k, i); - if (dets[i].prob[k] == 0) continue; - box a = dets[i].bbox; - for (j = i + 1; j < total; ++j) { - box b = dets[j].bbox; - if (box_iou(a, b) > thresh) { - dets[j].prob[k] = 0; - } - } - } - } + for (k = 0; k < classes; ++k) { + for (i = 0; i < total; ++i) { + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator_v3); + for (i = 0; i < total; ++i) { + //printf(" k = %d, \t i = %d \n", k, i); + if (dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for (j = i + 1; j < total; ++j) { + box b = dets[j].bbox; + if (box_iou(a, b) > thresh) { + dets[j].prob[k] = 0; + } + } + } + } } void do_nms(box *boxes, float **probs, int total, int classes, float thresh) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 324fc508..5389828e 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -76,36 +76,36 @@ void binarize_weights_gpu(float *weights, int n, int size, float *binary) __global__ void cuda_f32_to_f16(float* input_f32, size_t size, half *output_f16) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < size) output_f16[idx] = __float2half(input_f32[idx]); - //if (idx < size) *((unsigned short *)output_f16 + idx) = __float2half(input_f32[idx]); + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) output_f16[idx] = __float2half(input_f32[idx]); + //if (idx < size) *((unsigned short *)output_f16 + idx) = __float2half(input_f32[idx]); } void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16) { - cuda_f32_to_f16 <<< size / BLOCK + 1, BLOCK, 0, get_cuda_stream() >>> (input_f32, size, (half *)output_f16); + cuda_f32_to_f16 <<< size / BLOCK + 1, BLOCK, 0, get_cuda_stream() >>> (input_f32, size, (half *)output_f16); } __global__ void cuda_f16_to_f32(half* input_f16, size_t size, float *output_f32) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < size) output_f32[idx] = __half2float(input_f16[idx]); - //if (idx < size) output_f32[idx] = __half2float(*((unsigned short *)input_f16 + idx)); + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) output_f32[idx] = __half2float(input_f16[idx]); + //if (idx < size) output_f32[idx] = __half2float(*((unsigned short *)input_f16 + idx)); } void cuda_convert_f16_to_f32(float* input_f16, size_t size, float *output_f32) { - cuda_f16_to_f32 <<< size / BLOCK + 1, BLOCK, 0, get_cuda_stream() >>> ((half *)input_f16, size, output_f32); + cuda_f16_to_f32 <<< size / BLOCK + 1, BLOCK, 0, get_cuda_stream() >>> ((half *)input_f16, size, output_f32); } half *cuda_make_f16_from_f32_array(float *src, size_t n) { - half *dst16; - size_t size = sizeof(half)*n; - check_error(cudaMalloc((void **)&dst16, size)); - if (src) { - cuda_convert_f32_to_f16(src, n, (float *)dst16); - } - if (!dst16) error("Cuda malloc failed\n"); - return dst16; + half *dst16; + size_t size = sizeof(half)*n; + check_error(cudaMalloc((void **)&dst16, size)); + if (src) { + cuda_convert_f32_to_f16(src, n, (float *)dst16); + } + if (!dst16) error("Cuda malloc failed\n"); + return dst16; } void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) @@ -124,96 +124,96 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) } #ifdef CUDNN - float one = 1; // alpha[0], beta[0] is float for HALF and FLOAT - float alpha = 1, beta = 0; + float one = 1; // alpha[0], beta[0] is float for HALF and FLOAT + float alpha = 1, beta = 0; #ifdef CUDNN_HALF - // Note: For improved performance it is advised to use beta[0] = 0.0. - // For Tensor Core: cudnnSetConvolutionMathType() where cudnnMathType_t mathType = CUDNN_TENSOR_OP_MATH; - // 1. or CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM and use CUDNN_DATA_HALF - // 2. or CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED - // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops + // Note: For improved performance it is advised to use beta[0] = 0.0. + // For Tensor Core: cudnnSetConvolutionMathType() where cudnnMathType_t mathType = CUDNN_TENSOR_OP_MATH; + // 1. or CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM and use CUDNN_DATA_HALF + // 2. or CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED + // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops - const size_t input16_size = l.batch*l.c*l.w*l.h; - const size_t output16_size = l.batch*l.out_c*l.out_h*l.out_w; + const size_t input16_size = l.batch*l.c*l.w*l.h; + const size_t output16_size = l.batch*l.out_c*l.out_h*l.out_w; - if (*state.net.max_input16_size < input16_size) { - //printf("\n input16_size: cur = %zu \t max = %zu \n", input16_size, *state.net.max_input16_size); - *state.net.max_input16_size = input16_size; - if (*state.net.input16_gpu) cuda_free(*state.net.input16_gpu); - *state.net.input16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_input16_size); - } - float *input16 = *state.net.input16_gpu; + if (*state.net.max_input16_size < input16_size) { + //printf("\n input16_size: cur = %zu \t max = %zu \n", input16_size, *state.net.max_input16_size); + *state.net.max_input16_size = input16_size; + if (*state.net.input16_gpu) cuda_free(*state.net.input16_gpu); + *state.net.input16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_input16_size); + } + float *input16 = *state.net.input16_gpu; - if (*state.net.max_output16_size < output16_size) { - *state.net.max_output16_size = output16_size; - if (*state.net.output16_gpu) cuda_free(*state.net.output16_gpu); - *state.net.output16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_output16_size); - } - float *output16 = *state.net.output16_gpu; + if (*state.net.max_output16_size < output16_size) { + *state.net.max_output16_size = output16_size; + if (*state.net.output16_gpu) cuda_free(*state.net.output16_gpu); + *state.net.output16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_output16_size); + } + float *output16 = *state.net.output16_gpu; - cuda_convert_f32_to_f16(state.input, input16_size, input16); + cuda_convert_f32_to_f16(state.input, input16_size, input16); - //fill_ongpu(output16_size / 2, 0, (float *)output16, 1); - cudnnConvolutionForward(cudnn_handle(), - &alpha, - l.srcTensorDesc, - input16, - l.weightDesc, - l.weights_gpu16, - l.convDesc, - l.fw_algo, - state.workspace, - l.workspace_size, - &beta, - l.dstTensorDesc, - output16); - + //fill_ongpu(output16_size / 2, 0, (float *)output16, 1); + cudnnConvolutionForward(cudnn_handle(), + &alpha, + l.srcTensorDesc, + input16, + l.weightDesc, + l.weights_gpu16, + l.convDesc, + l.fw_algo, + state.workspace, + l.workspace_size, + &beta, + l.dstTensorDesc, + output16); + - if (l.batch_normalize) - { - if (state.train) // Training - { - copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1); - //cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream()); - float one = 1; - float zero = 0; - // Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth - // compared to FP32, it’s just that the statistics and value adjustment should be done in FP32. - cudnnBatchNormalizationForwardTraining(cudnn_handle(), - CUDNN_BATCHNORM_SPATIAL, - &one, - &zero, - l.normDstTensorDescF16, - l.x_gpu, // input - l.normDstTensorDescF16, - output16, // output - l.normTensorDesc, - l.scales_gpu, - l.biases_gpu, - .01, - l.rolling_mean_gpu, // output (should be FP32) - l.rolling_variance_gpu, // output (should be FP32) - .00001, - l.mean_gpu, // output (should be FP32) - l.variance_gpu); // output (should be FP32) + if (l.batch_normalize) + { + if (state.train) // Training + { + copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1); + //cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream()); + float one = 1; + float zero = 0; + // Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth + // compared to FP32, it’s just that the statistics and value adjustment should be done in FP32. + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l.normDstTensorDescF16, + l.x_gpu, // input + l.normDstTensorDescF16, + output16, // output + l.normTensorDesc, + l.scales_gpu, + l.biases_gpu, + .01, + l.rolling_mean_gpu, // output (should be FP32) + l.rolling_variance_gpu, // output (should be FP32) + .00001, + l.mean_gpu, // output (should be FP32) + l.variance_gpu); // output (should be FP32) - cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); - //forward_batchnorm_layer_gpu(l, state); - } - else // Detection - { - cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); - normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); - scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); - } - } - else // BIAS only - { - cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); - } + cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); + //forward_batchnorm_layer_gpu(l, state); + } + else // Detection + { + cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); + normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + } + } + else // BIAS only + { + cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } #else @@ -230,7 +230,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) &one, l.dstTensorDesc, l.output_gpu); -#endif // CUDNN_HALF +#endif // CUDNN_HALF #else @@ -250,16 +250,16 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) #ifndef CUDNN_HALF if (l.batch_normalize) { forward_batchnorm_layer_gpu(l, state); - } - else { - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); - } + } + else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } #endif // no CUDNN_HALF activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if(l.dot > 0) dot_error_gpu(l); if(l.binary || l.xnor) swap_binary(&l); - //cudaDeviceSynchronize(); // for correct profiling of performance + //cudaDeviceSynchronize(); // for correct profiling of performance } void backward_convolutional_layer_gpu(convolutional_layer l, network_state state) @@ -272,126 +272,126 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state if(l.batch_normalize){ backward_batchnorm_layer_gpu(l, state); } else { - //backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + //backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); } #endif // no CUDNN_HALF float *original_input = state.input; if(l.xnor) state.input = l.binary_input_gpu; #ifdef CUDNN - float one = 1; - float alpha = 1, beta = 0; + float one = 1; + float alpha = 1, beta = 0; #ifdef CUDNN_HALF - - const size_t input16_size = l.batch*l.c*l.w*l.h; - const size_t delta16_size = l.batch*l.n*l.out_w*l.out_h; - - if (*state.net.max_input16_size < input16_size) { - *state.net.max_input16_size = input16_size; - if(*state.net.input16_gpu) cuda_free(*state.net.input16_gpu); - *state.net.input16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_input16_size); - } - float *input16 = *state.net.input16_gpu; + + const size_t input16_size = l.batch*l.c*l.w*l.h; + const size_t delta16_size = l.batch*l.n*l.out_w*l.out_h; + + if (*state.net.max_input16_size < input16_size) { + *state.net.max_input16_size = input16_size; + if(*state.net.input16_gpu) cuda_free(*state.net.input16_gpu); + *state.net.input16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_input16_size); + } + float *input16 = *state.net.input16_gpu; - if (*state.net.max_output16_size < delta16_size) { - *state.net.max_output16_size = delta16_size; - if(*state.net.output16_gpu) cuda_free(*state.net.output16_gpu); - *state.net.output16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_output16_size); - } - float *delta16 = *state.net.output16_gpu; + if (*state.net.max_output16_size < delta16_size) { + *state.net.max_output16_size = delta16_size; + if(*state.net.output16_gpu) cuda_free(*state.net.output16_gpu); + *state.net.output16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_output16_size); + } + float *delta16 = *state.net.output16_gpu; - cuda_convert_f32_to_f16(state.input, input16_size, input16); - cuda_convert_f32_to_f16(l.delta_gpu, delta16_size, delta16); + cuda_convert_f32_to_f16(state.input, input16_size, input16); + cuda_convert_f32_to_f16(l.delta_gpu, delta16_size, delta16); - if (l.batch_normalize) { - //if (!state.train) { - // l.mean_gpu = l.rolling_mean_gpu; - // l.variance_gpu = l.rolling_variance_gpu; - //} - float one = 1; - float zero = 0; - cudnnBatchNormalizationBackward(cudnn_handle(), - CUDNN_BATCHNORM_SPATIAL, - &one, - &zero, - &one, - &one, - l.normDstTensorDescF16, - l.x_gpu, // input - l.normDstTensorDescF16, - delta16, // input - l.normDstTensorDescF16, - l.x_norm_gpu, // output - l.normTensorDesc, - l.scales_gpu, // output (should be FP32) - l.scale_updates_gpu, // output (should be FP32) - l.bias_updates_gpu, // output (should be FP32) - .00001, - l.mean_gpu, // input (should be FP32) - l.variance_gpu); // input (should be FP32) - copy_ongpu(l.outputs*l.batch / 2, l.x_norm_gpu, 1, delta16, 1); - //cudaMemcpyAsync(delta16, l.x_norm_gpu, l.outputs*l.batch * sizeof(half), cudaMemcpyDefault, get_cuda_stream()); - } - else - { - //backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); - } + if (l.batch_normalize) { + //if (!state.train) { + // l.mean_gpu = l.rolling_mean_gpu; + // l.variance_gpu = l.rolling_variance_gpu; + //} + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l.normDstTensorDescF16, + l.x_gpu, // input + l.normDstTensorDescF16, + delta16, // input + l.normDstTensorDescF16, + l.x_norm_gpu, // output + l.normTensorDesc, + l.scales_gpu, // output (should be FP32) + l.scale_updates_gpu, // output (should be FP32) + l.bias_updates_gpu, // output (should be FP32) + .00001, + l.mean_gpu, // input (should be FP32) + l.variance_gpu); // input (should be FP32) + copy_ongpu(l.outputs*l.batch / 2, l.x_norm_gpu, 1, delta16, 1); + //cudaMemcpyAsync(delta16, l.x_norm_gpu, l.outputs*l.batch * sizeof(half), cudaMemcpyDefault, get_cuda_stream()); + } + else + { + //backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } - // convert input: state.input (x), l.delta_gpu (y) from fp32 to fp16 - // get output: l.weight_updates_gpu (dw) and convert it to fp32 (ONLY if it is fp16) + // convert input: state.input (x), l.delta_gpu (y) from fp32 to fp16 + // get output: l.weight_updates_gpu (dw) and convert it to fp32 (ONLY if it is fp16) - // calculate conv weight updates - // Already: l.weight_updates_gpu = (l.weight_updates_gpu - l.weight*decay*batch*subdivision)*momentum - // so we should copy f32 to f16, or compute: f16=(w_up - w*d*b*s)*m - cuda_convert_f32_to_f16(l.weight_updates_gpu, l.c*l.n*l.size*l.size, l.weight_updates_gpu16); + // calculate conv weight updates + // Already: l.weight_updates_gpu = (l.weight_updates_gpu - l.weight*decay*batch*subdivision)*momentum + // so we should copy f32 to f16, or compute: f16=(w_up - w*d*b*s)*m + cuda_convert_f32_to_f16(l.weight_updates_gpu, l.c*l.n*l.size*l.size, l.weight_updates_gpu16); - cudnnConvolutionBackwardFilter(cudnn_handle(), - &one, - l.srcTensorDesc, - input16, //state.input, - l.ddstTensorDesc, - delta16, //l.delta_gpu, - l.convDesc, - l.bf_algo, - state.workspace, - l.workspace_size, - &one, - l.dweightDesc, - l.weight_updates_gpu16); // l.weight_updates_gpu); + cudnnConvolutionBackwardFilter(cudnn_handle(), + &one, + l.srcTensorDesc, + input16, //state.input, + l.ddstTensorDesc, + delta16, //l.delta_gpu, + l.convDesc, + l.bf_algo, + state.workspace, + l.workspace_size, + &one, + l.dweightDesc, + l.weight_updates_gpu16); // l.weight_updates_gpu); - cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.c*l.n*l.size*l.size, l.weight_updates_gpu); + cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.c*l.n*l.size*l.size, l.weight_updates_gpu); - if (state.delta) { - if (l.binary || l.xnor) swap_binary(&l); + if (state.delta) { + if (l.binary || l.xnor) swap_binary(&l); - // http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData - // calculate delta for the next layer - // convert input: l.weights_gpu (w), l.delta_gpu (dy) from fp32 to fp16 - // get output: state.delta (dx) and convert it to fp32 (ONLY if it is fp16) - cudnnConvolutionBackwardData(cudnn_handle(), - &alpha, - l.weightDesc, - l.weights_gpu16, //l.weights_gpu, - l.ddstTensorDesc, - delta16, //l.delta_gpu, - l.convDesc, - l.bd_algo, - state.workspace, - l.workspace_size, - &beta, - l.dsrcTensorDesc, - input16); // state.delta); + // http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData + // calculate delta for the next layer + // convert input: l.weights_gpu (w), l.delta_gpu (dy) from fp32 to fp16 + // get output: state.delta (dx) and convert it to fp32 (ONLY if it is fp16) + cudnnConvolutionBackwardData(cudnn_handle(), + &alpha, + l.weightDesc, + l.weights_gpu16, //l.weights_gpu, + l.ddstTensorDesc, + delta16, //l.delta_gpu, + l.convDesc, + l.bd_algo, + state.workspace, + l.workspace_size, + &beta, + l.dsrcTensorDesc, + input16); // state.delta); - cuda_convert_f16_to_f32(input16, input16_size, state.delta); + cuda_convert_f16_to_f32(input16, input16_size, state.delta); - if (l.binary || l.xnor) swap_binary(&l); - if (l.xnor) gradient_array_ongpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, state.delta); - } -#else // CUDNN_HALF + if (l.binary || l.xnor) swap_binary(&l); + if (l.xnor) gradient_array_ongpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, state.delta); + } +#else // CUDNN_HALF - // calculate conv weight updates - // if used: beta=1 then loss decreases faster + // calculate conv weight updates + // if used: beta=1 then loss decreases faster cudnnConvolutionBackwardFilter(cudnn_handle(), &one, l.srcTensorDesc, @@ -408,8 +408,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state if(state.delta){ if(l.binary || l.xnor) swap_binary(&l); - // http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData - // calculate delta for the next layer + // http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData + // calculate delta for the next layer cudnnConvolutionBackwardData(cudnn_handle(), &one, l.weightDesc, @@ -427,9 +427,9 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state if(l.xnor) gradient_array_ongpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, state.delta); } -#endif // CUDNN_HALF +#endif // CUDNN_HALF -#else // CUDNN +#else // CUDNN int m = l.n; int n = l.size*l.size*l.c; int k = l.out_w*l.out_h; @@ -482,7 +482,7 @@ void push_convolutional_layer(convolutional_layer layer) { cuda_push_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size); #ifdef CUDNN_HALF - cuda_convert_f32_to_f16(layer.weights_gpu, layer.c*layer.n*layer.size*layer.size, layer.weights_gpu16); + cuda_convert_f32_to_f16(layer.weights_gpu, layer.c*layer.n*layer.size*layer.size, layer.weights_gpu16); #endif cuda_push_array(layer.biases_gpu, layer.biases, layer.n); cuda_push_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size); @@ -522,14 +522,14 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float adam_gpu(size, layer.weights_gpu, layer.m_gpu, layer.v_gpu, layer.B1, layer.B2, learning_rate/batch, layer.eps, layer.t+1); fill_ongpu(size, 0, layer.weight_updates_gpu, 1); }else{ - // update weights: - // weights_gpu = weights_gpu*(1 - decay*lr) + weight_updates_gpu*lr / (batch*subdivision) = - // weights_gpu*(1 - 0.0005*0.001) + weight_updates_gpu*0.001/(64*8) = - // weights_gpu * 0.999 999 5 + weight_updates_gpu * 0.000 001 953125 - // - // weight_updates_gpu = (weight_updates_gpu - weights_gpu*decay*batch*subdivision)*momentum = - // (weight_updates_gpu - weights_gpu * 0.0005 * 64 * 8) * 0.9 = - // weight_updates_gpu*0.9 - weights_gpu*0.2304 + // update weights: + // weights_gpu = weights_gpu*(1 - decay*lr) + weight_updates_gpu*lr / (batch*subdivision) = + // weights_gpu*(1 - 0.0005*0.001) + weight_updates_gpu*0.001/(64*8) = + // weights_gpu * 0.999 999 5 + weight_updates_gpu * 0.000 001 953125 + // + // weight_updates_gpu = (weight_updates_gpu - weights_gpu*decay*batch*subdivision)*momentum = + // (weight_updates_gpu - weights_gpu * 0.0005 * 64 * 8) * 0.9 = + // weight_updates_gpu*0.9 - weights_gpu*0.2304 axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); scal_ongpu(size, momentum, layer.weight_updates_gpu, 1); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 90f9551b..554bd421 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -141,67 +141,67 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) { #ifdef CUDNN_HALF - // TRUE_HALF_CONFIG is only supported on architectures with true fp16 support (compute capability 5.3 and 6.0): - // Tegra X1, Jetson TX1, DRIVE CX, DRIVE PX, Quadro GP100, Tesla P100 - // PSEUDO_HALF_CONFIG is required for Tensor Cores - our case! - const cudnnDataType_t data_type = CUDNN_DATA_HALF; + // TRUE_HALF_CONFIG is only supported on architectures with true fp16 support (compute capability 5.3 and 6.0): + // Tegra X1, Jetson TX1, DRIVE CX, DRIVE PX, Quadro GP100, Tesla P100 + // PSEUDO_HALF_CONFIG is required for Tensor Cores - our case! + const cudnnDataType_t data_type = CUDNN_DATA_HALF; #else - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; + cudnnDataType_t data_type = CUDNN_DATA_FLOAT; #endif #if(CUDNN_MAJOR >= 7) - // Tensor Core uses CUDNN_TENSOR_OP_MATH instead of CUDNN_DEFAULT_MATH - // For *_ALGO_WINOGRAD_NONFUSED can be used CUDNN_DATA_FLOAT - // otherwise Input, Filter and Output descriptors (xDesc, yDesc, wDesc, dxDesc, dyDesc and dwDesc as applicable) have dataType = CUDNN_DATA_HALF - // Three techniques for training using Mixed-precision: https://devblogs.nvidia.com/mixed-precision-training-deep-neural-networks/ - // 1. Accumulation into FP32 - // 2. Loss Scaling - required only for: activation gradients. We do not use. - // 3. FP32 Master Copy of Weights - // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops - cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH); + // Tensor Core uses CUDNN_TENSOR_OP_MATH instead of CUDNN_DEFAULT_MATH + // For *_ALGO_WINOGRAD_NONFUSED can be used CUDNN_DATA_FLOAT + // otherwise Input, Filter and Output descriptors (xDesc, yDesc, wDesc, dxDesc, dyDesc and dwDesc as applicable) have dataType = CUDNN_DATA_HALF + // Three techniques for training using Mixed-precision: https://devblogs.nvidia.com/mixed-precision-training-deep-neural-networks/ + // 1. Accumulation into FP32 + // 2. Loss Scaling - required only for: activation gradients. We do not use. + // 3. FP32 Master Copy of Weights + // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops + cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH); #endif - // INT8_CONFIG, INT8_EXT_CONFIG, INT8x4_CONFIG and INT8x4_EXT_CONFIG are only supported - // on architectures with DP4A support (compute capability 6.1 and later). - //cudnnDataType_t data_type = CUDNN_DATA_INT8; + // INT8_CONFIG, INT8_EXT_CONFIG, INT8x4_CONFIG and INT8x4_EXT_CONFIG are only supported + // on architectures with DP4A support (compute capability 6.1 and later). + //cudnnDataType_t data_type = CUDNN_DATA_INT8; - // backward delta + // backward delta cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->c, l->h, l->w); cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->out_c, l->out_h, l->out_w); cudnnSetFilter4dDescriptor(l->dweightDesc, data_type, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size); - // forward + // forward cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->c, l->h, l->w); cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->out_c, l->out_h, l->out_w); cudnnSetFilter4dDescriptor(l->weightDesc, data_type, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size); - // batch norm - cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); - cudnnSetTensor4dDescriptor(l->normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + // batch norm + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + cudnnSetTensor4dDescriptor(l->normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); - cudnnSetTensor4dDescriptor(l->normDstTensorDescF16, CUDNN_TENSOR_NCHW, data_type, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normDstTensorDescF16, CUDNN_TENSOR_NCHW, data_type, l->batch, l->out_c, l->out_h, l->out_w); #if(CUDNN_MAJOR >= 6) - cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); // cudnn >= 6.0 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); // cudnn >= 6.0 #else - cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1 #endif - int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST; - int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST; - int backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST; - if (cudnn_preference == cudnn_smallest) - { - forward_algo = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; - backward_algo = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; - backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE; - printf(" CUDNN-slow "); - } + int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST; + int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST; + int backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST; + if (cudnn_preference == cudnn_smallest) + { + forward_algo = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; + backward_algo = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; + backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE; + printf(" CUDNN-slow "); + } - cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), + cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), l->srcTensorDesc, l->weightDesc, l->convDesc, l->dstTensorDesc, - forward_algo, + forward_algo, 0, &l->fw_algo); cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), @@ -209,7 +209,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) l->ddstTensorDesc, l->convDesc, l->dsrcTensorDesc, - backward_algo, + backward_algo, 0, &l->bd_algo); cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), @@ -217,41 +217,41 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) l->ddstTensorDesc, l->convDesc, l->dweightDesc, - backward_filter, + backward_filter, 0, &l->bf_algo); - if (data_type == CUDNN_DATA_HALF) - { - // HALF-16 if(data_type == CUDNN_DATA_HALF) - l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; - l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1; - l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1; + if (data_type == CUDNN_DATA_HALF) + { + // HALF-16 if(data_type == CUDNN_DATA_HALF) + l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; + l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1; + l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1; - // FLOAT-32 if(data_type == CUDNN_DATA_FLOAT) - //l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED; - //l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED; - //l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED; + // FLOAT-32 if(data_type == CUDNN_DATA_FLOAT) + //l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED; + //l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED; + //l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED; - int fw = 0, bd = 0, bf = 0; - if (l->fw_algo == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM) fw = 1; - //printf("Tensor Cores - Forward enabled: l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM \n"); - if (l->fw_algo == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED) fw = 2; - //printf("Tensor Cores - Forward enabled: l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED \n"); + int fw = 0, bd = 0, bf = 0; + if (l->fw_algo == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM) fw = 1; + //printf("Tensor Cores - Forward enabled: l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM \n"); + if (l->fw_algo == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED) fw = 2; + //printf("Tensor Cores - Forward enabled: l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED \n"); - if (l->bd_algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_1) bd = 1; - //printf("Tensor Cores - Backward-data enabled: l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 \n"); - if (l->bd_algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED) bd = 2; - //printf("Tensor Cores - Backward-data enabled: l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED \n"); + if (l->bd_algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_1) bd = 1; + //printf("Tensor Cores - Backward-data enabled: l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 \n"); + if (l->bd_algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED) bd = 2; + //printf("Tensor Cores - Backward-data enabled: l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED \n"); - if (l->bf_algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1) bf = 1; - //printf("Tensor Cores - Backward-filter enabled: l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 \n"); - if (l->bf_algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED) bf = 2; - //printf("Tensor Cores - Backward-filter enabled: l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED \n"); + if (l->bf_algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1) bf = 1; + //printf("Tensor Cores - Backward-filter enabled: l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 \n"); + if (l->bf_algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED) bf = 2; + //printf("Tensor Cores - Backward-filter enabled: l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED \n"); - if (fw == 2 && bd == 2 && bf == 2) printf("TF "); - else if (fw == 1 && bd == 1 && bf == 1) printf("TH "); - } + if (fw == 2 && bd == 2 && bf == 2) printf("TF "); + else if (fw == 1 && bd == 1 && bf == 1) printf("TH "); + } } #endif #endif @@ -344,8 +344,8 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); #ifdef CUDNN_HALF - l.weights_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weights, c*n*size*size / 2); - l.weight_updates_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weight_updates, c*n*size*size / 2); + l.weights_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weights, c*n*size*size / 2); + l.weight_updates_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weight_updates, c*n*size*size / 2); #endif l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); @@ -379,10 +379,10 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); } -#ifdef CUDNN - cudnnCreateTensorDescriptor(&l.normDstTensorDesc); - cudnnCreateTensorDescriptor(&l.normDstTensorDescF16); - cudnnCreateTensorDescriptor(&l.normTensorDesc); +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normDstTensorDesc); + cudnnCreateTensorDescriptor(&l.normDstTensorDescF16); + cudnnCreateTensorDescriptor(&l.normTensorDesc); cudnnCreateTensorDescriptor(&l.srcTensorDesc); cudnnCreateTensorDescriptor(&l.dstTensorDesc); cudnnCreateFilterDescriptor(&l.weightDesc); @@ -398,8 +398,8 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int l.activation = activation; //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); - l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; - fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); return l; } @@ -445,8 +445,8 @@ void test_convolutional_layer() void resize_convolutional_layer(convolutional_layer *l, int w, int h) { - int old_w = l->w; - int old_h = l->h; + int old_w = l->w; + int old_h = l->h; l->w = w; l->h = h; int out_w = convolutional_out_width(*l); @@ -465,31 +465,31 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); } - if (l->xnor) { - //l->binary_input = realloc(l->inputs*l->batch, sizeof(float)); - } + if (l->xnor) { + //l->binary_input = realloc(l->inputs*l->batch, sizeof(float)); + } #ifdef GPU - if (old_w < w || old_h < h) { - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); + if (old_w < w || old_h < h) { + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); - l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); - l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); - if (l->batch_normalize) { - cuda_free(l->x_gpu); - cuda_free(l->x_norm_gpu); + if (l->batch_normalize) { + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); - l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); - l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); - } + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } - if (l->xnor) { - cuda_free(l->binary_input_gpu); - l->binary_input_gpu = cuda_make_array(0, l->inputs*l->batch); - } - } + if (l->xnor) { + cuda_free(l->binary_input_gpu); + l->binary_input_gpu = cuda_make_array(0, l->inputs*l->batch); + } + } #ifdef CUDNN cudnn_convolutional_setup(l, cudnn_fastest); #endif @@ -497,15 +497,15 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) l->workspace_size = get_workspace_size(*l); #ifdef CUDNN - // check for excessive memory consumption - size_t free_byte; - size_t total_byte; - check_error(cudaMemGetInfo(&free_byte, &total_byte)); - if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { - printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2); - cudnn_convolutional_setup(l, cudnn_smallest); - l->workspace_size = get_workspace_size(*l); - } + // check for excessive memory consumption + size_t free_byte; + size_t total_byte; + check_error(cudaMemGetInfo(&free_byte, &total_byte)); + if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { + printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2); + cudnn_convolutional_setup(l, cudnn_smallest); + l->workspace_size = get_workspace_size(*l); + } #endif } diff --git a/src/cuda.c b/src/cuda.c index 53009067..688e1c58 100644 --- a/src/cuda.c +++ b/src/cuda.c @@ -61,25 +61,25 @@ dim3 cuda_gridsize(size_t n){ return d; } -static cudaStream_t streamsArray[16]; // cudaStreamSynchronize( get_cuda_stream() ); +static cudaStream_t streamsArray[16]; // cudaStreamSynchronize( get_cuda_stream() ); static int streamInit[16] = { 0 }; cudaStream_t get_cuda_stream() { - int i = cuda_get_device(); - if (!streamInit[i]) { - cudaError_t status = cudaStreamCreate(&streamsArray[i]); - //cudaError_t status = cudaStreamCreateWithFlags(&streamsArray[i], cudaStreamNonBlocking); - if (status != cudaSuccess) { - printf(" cudaStreamCreate error: %d \n", status); - const char *s = cudaGetErrorString(status); - char buffer[256]; - printf("CUDA Error: %s\n", s); - status = cudaStreamCreateWithFlags(&streamsArray[i], cudaStreamDefault); - check_error(status); - } - streamInit[i] = 1; - } - return streamsArray[i]; + int i = cuda_get_device(); + if (!streamInit[i]) { + cudaError_t status = cudaStreamCreate(&streamsArray[i]); + //cudaError_t status = cudaStreamCreateWithFlags(&streamsArray[i], cudaStreamNonBlocking); + if (status != cudaSuccess) { + printf(" cudaStreamCreate error: %d \n", status); + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error: %s\n", s); + status = cudaStreamCreateWithFlags(&streamsArray[i], cudaStreamDefault); + check_error(status); + } + streamInit[i] = 1; + } + return streamsArray[i]; } @@ -92,7 +92,7 @@ cudnnHandle_t cudnn_handle() if(!init[i]) { cudnnCreate(&handle[i]); init[i] = 1; - cudnnStatus_t status = cudnnSetStream(handle[i], get_cuda_stream()); + cudnnStatus_t status = cudnnSetStream(handle[i], get_cuda_stream()); } return handle[i]; } @@ -105,7 +105,7 @@ cublasHandle_t blas_handle() int i = cuda_get_device(); if(!init[i]) { cublasCreate(&handle[i]); - cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream()); + cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream()); init[i] = 1; } return handle[i]; @@ -119,7 +119,7 @@ float *cuda_make_array(float *x, size_t n) check_error(status); if(x){ //status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); - status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream()); + status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream()); check_error(status); } if(!x_gpu) error("Cuda malloc failed\n"); @@ -164,7 +164,7 @@ int *cuda_make_int_array(size_t n) void cuda_free(float *x_gpu) { - //cudaStreamSynchronize(get_cuda_stream()); + //cudaStreamSynchronize(get_cuda_stream()); cudaError_t status = cudaFree(x_gpu); check_error(status); } @@ -173,7 +173,7 @@ void cuda_push_array(float *x_gpu, float *x, size_t n) { size_t size = sizeof(float)*n; //cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); - cudaError_t status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream()); + cudaError_t status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream()); check_error(status); } @@ -181,9 +181,9 @@ void cuda_pull_array(float *x_gpu, float *x, size_t n) { size_t size = sizeof(float)*n; //cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); - cudaError_t status = cudaMemcpyAsync(x, x_gpu, size, cudaMemcpyDeviceToHost, get_cuda_stream()); + cudaError_t status = cudaMemcpyAsync(x, x_gpu, size, cudaMemcpyDeviceToHost, get_cuda_stream()); check_error(status); - cudaStreamSynchronize(get_cuda_stream()); + cudaStreamSynchronize(get_cuda_stream()); } #else // GPU diff --git a/src/data.c b/src/data.c index ec7d36f3..2a0a7e9f 100644 --- a/src/data.c +++ b/src/data.c @@ -44,15 +44,15 @@ char **get_random_paths(char **paths, int n, int m) char **random_paths = calloc(n, sizeof(char*)); int i; pthread_mutex_lock(&mutex); - //printf("n = %d \n", n); + //printf("n = %d \n", n); for(i = 0; i < n; ++i){ - do { - int index = random_gen() % m; - random_paths[i] = paths[index]; - //if(i == 0) printf("%s\n", paths[index]); - //printf("grp: %s\n", paths[index]); - if (strlen(random_paths[i]) <= 4) printf(" Very small path to the image: %s \n", random_paths[i]); - } while (strlen(random_paths[i]) == 0); + do { + int index = random_gen() % m; + random_paths[i] = paths[index]; + //if(i == 0) printf("%s\n", paths[index]); + //printf("grp: %s\n", paths[index]); + if (strlen(random_paths[i]) <= 4) printf(" Very small path to the image: %s \n", random_paths[i]); + } while (strlen(random_paths[i]) == 0); } pthread_mutex_unlock(&mutex); return random_paths; @@ -140,18 +140,18 @@ box_label *read_boxes(char *filename, int *n) { box_label *boxes = calloc(1, sizeof(box_label)); FILE *file = fopen(filename, "r"); - if (!file) { - printf("Can't open label file. (This can be normal only if you use MSCOCO) \n"); - //file_error(filename); - FILE* fw = fopen("bad.list", "a"); - fwrite(filename, sizeof(char), strlen(filename), fw); - char *new_line = "\n"; - fwrite(new_line, sizeof(char), strlen(new_line), fw); - fclose(fw); + if (!file) { + printf("Can't open label file. (This can be normal only if you use MSCOCO) \n"); + //file_error(filename); + FILE* fw = fopen("bad.list", "a"); + fwrite(filename, sizeof(char), strlen(filename), fw); + char *new_line = "\n"; + fwrite(new_line, sizeof(char), strlen(new_line), fw); + fclose(fw); - *n = 0; - return boxes; - } + *n = 0; + return boxes; + } float x, y, h, w; int id; int count = 0; @@ -224,7 +224,7 @@ void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) { char labelpath[4096]; - replace_image_to_label(path, labelpath); + replace_image_to_label(path, labelpath); int count = 0; box_label *boxes = read_boxes(labelpath, &count); @@ -258,9 +258,9 @@ void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) { char labelpath[4096]; - replace_image_to_label(path, labelpath); + replace_image_to_label(path, labelpath); - int count = 0; + int count = 0; box_label *boxes = read_boxes(labelpath, &count); randomize_boxes(boxes, count); correct_boxes(boxes, count, dx, dy, sx, sy, flip); @@ -299,77 +299,77 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int } void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy, - int small_object, int net_w, int net_h) + int small_object, int net_w, int net_h) { - char labelpath[4096]; - replace_image_to_label(path, labelpath); + char labelpath[4096]; + replace_image_to_label(path, labelpath); - int count = 0; - int i; - box_label *boxes = read_boxes(labelpath, &count); - float lowest_w = 1.F / net_w; - float lowest_h = 1.F / net_h; - if (small_object == 1) { - for (i = 0; i < count; ++i) { - if (boxes[i].w < lowest_w) boxes[i].w = lowest_w; - if (boxes[i].h < lowest_h) boxes[i].h = lowest_h; - } - } - randomize_boxes(boxes, count); - correct_boxes(boxes, count, dx, dy, sx, sy, flip); - if (count > num_boxes) count = num_boxes; - float x, y, w, h; - int id; + int count = 0; + int i; + box_label *boxes = read_boxes(labelpath, &count); + float lowest_w = 1.F / net_w; + float lowest_h = 1.F / net_h; + if (small_object == 1) { + for (i = 0; i < count; ++i) { + if (boxes[i].w < lowest_w) boxes[i].w = lowest_w; + if (boxes[i].h < lowest_h) boxes[i].h = lowest_h; + } + } + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + if (count > num_boxes) count = num_boxes; + float x, y, w, h; + int id; - for (i = 0; i < count; ++i) { - x = boxes[i].x; - y = boxes[i].y; - w = boxes[i].w; - h = boxes[i].h; - id = boxes[i].id; + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; - // not detect small objects - //if ((w < 0.001F || h < 0.001F)) continue; - // if truth (box for object) is smaller than 1x1 pix - char buff[256]; - if (id >= classes) { - printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, classes); - sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, classes); - system(buff); - getchar(); - continue; - } - if ((w < lowest_w || h < lowest_h)) { - //sprintf(buff, "echo %s \"Very small object: w < lowest_w OR h < lowest_h\" >> bad_label.list", labelpath); - //system(buff); - continue; - } - if (x == 999999 || y == 999999) { - printf("\n Wrong annotation: x = 0, y = 0 \n"); - sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath); - system(buff); - continue; - } - if (x <= 0 || x > 1 || y <= 0 || y > 1) { - printf("\n Wrong annotation: x = %f, y = %f \n", x, y); - sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y); - system(buff); - continue; - } - if (w > 1) { - printf("\n Wrong annotation: w = %f \n", w); - sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w); - system(buff); - w = 1; - } - if (h > 1) { - printf("\n Wrong annotation: h = %f \n", h); - sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h); - system(buff); - h = 1; - } - if (x == 0) x += lowest_w; - if (y == 0) y += lowest_h; + // not detect small objects + //if ((w < 0.001F || h < 0.001F)) continue; + // if truth (box for object) is smaller than 1x1 pix + char buff[256]; + if (id >= classes) { + printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, classes); + sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, classes); + system(buff); + getchar(); + continue; + } + if ((w < lowest_w || h < lowest_h)) { + //sprintf(buff, "echo %s \"Very small object: w < lowest_w OR h < lowest_h\" >> bad_label.list", labelpath); + //system(buff); + continue; + } + if (x == 999999 || y == 999999) { + printf("\n Wrong annotation: x = 0, y = 0 \n"); + sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath); + system(buff); + continue; + } + if (x <= 0 || x > 1 || y <= 0 || y > 1) { + printf("\n Wrong annotation: x = %f, y = %f \n", x, y); + sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y); + system(buff); + continue; + } + if (w > 1) { + printf("\n Wrong annotation: w = %f \n", w); + sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w); + system(buff); + w = 1; + } + if (h > 1) { + printf("\n Wrong annotation: h = %f \n", h); + sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h); + system(buff); + h = 1; + } + if (x == 0) x += lowest_w; + if (y == 0) y += lowest_h; truth[i*5+0] = x; truth[i*5+1] = y; @@ -524,7 +524,7 @@ matrix load_tags_paths(char **paths, int n, int k) char **get_labels_custom(char *filename, int *size) { list *plist = get_paths(filename); - if(size) *size = plist->size; + if(size) *size = plist->size; char **labels = (char **)list_to_array(plist); free_list(plist); return labels; @@ -532,7 +532,7 @@ char **get_labels_custom(char *filename, int *size) char **get_labels(char *filename) { - return get_labels_custom(filename, NULL); + return get_labels_custom(filename, NULL); } void free_data(data d) @@ -742,22 +742,22 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo d.y = make_matrix(n, 5*boxes); for(i = 0; i < n; ++i){ - const char *filename = random_paths[i]; + const char *filename = random_paths[i]; - int flag = (c >= 3); - IplImage *src; - if ((src = cvLoadImage(filename, flag)) == 0) - { - fprintf(stderr, "Cannot load image \"%s\"\n", filename); - char buff[256]; - sprintf(buff, "echo %s >> bad.list", filename); - system(buff); - continue; - //exit(0); - } + int flag = (c >= 3); + IplImage *src; + if ((src = cvLoadImage(filename, flag)) == 0) + { + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + continue; + //exit(0); + } - int oh = src->height; - int ow = src->width; + int oh = src->height; + int ow = src->width; int dw = (ow*jitter); int dh = (oh*jitter); @@ -778,81 +778,81 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo float dx = ((float)pleft/ow)/sx; float dy = ((float)ptop /oh)/sy; - float dhue = rand_uniform_strong(-hue, hue); - float dsat = rand_scale(saturation); - float dexp = rand_scale(exposure); + float dhue = rand_uniform_strong(-hue, hue); + float dsat = rand_scale(saturation); + float dexp = rand_scale(exposure); - image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp); - d.X.vals[i] = ai.data; - - //show_image(ai, "aug"); - //cvWaitKey(0); + image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp); + d.X.vals[i] = ai.data; + + //show_image(ai, "aug"); + //cvWaitKey(0); fill_truth_detection(filename, boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object, w, h); - cvReleaseImage(&src); + cvReleaseImage(&src); } free(random_paths); return d; } -#else // OPENCV +#else // OPENCV data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object) { c = c ? c : 3; - char **random_paths = get_random_paths(paths, n, m); - int i; - data d = { 0 }; - d.shallow = 0; + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = { 0 }; + d.shallow = 0; - d.X.rows = n; - d.X.vals = calloc(d.X.rows, sizeof(float*)); - d.X.cols = h*w*c; + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*c; - d.y = make_matrix(n, 5 * boxes); - for (i = 0; i < n; ++i) { - image orig = load_image(random_paths[i], 0, 0, c); + d.y = make_matrix(n, 5 * boxes); + for (i = 0; i < n; ++i) { + image orig = load_image(random_paths[i], 0, 0, c); - int oh = orig.h; - int ow = orig.w; + int oh = orig.h; + int ow = orig.w; - int dw = (ow*jitter); - int dh = (oh*jitter); + int dw = (ow*jitter); + int dh = (oh*jitter); - int pleft = rand_uniform_strong(-dw, dw); - int pright = rand_uniform_strong(-dw, dw); - int ptop = rand_uniform_strong(-dh, dh); - int pbot = rand_uniform_strong(-dh, dh); + int pleft = rand_uniform_strong(-dw, dw); + int pright = rand_uniform_strong(-dw, dw); + int ptop = rand_uniform_strong(-dh, dh); + int pbot = rand_uniform_strong(-dh, dh); - int swidth = ow - pleft - pright; - int sheight = oh - ptop - pbot; + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; - float sx = (float)swidth / ow; - float sy = (float)sheight / oh; + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; - int flip = use_flip ? random_gen() % 2 : 0; - image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + int flip = use_flip ? random_gen() % 2 : 0; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); - float dx = ((float)pleft / ow) / sx; - float dy = ((float)ptop / oh) / sy; + float dx = ((float)pleft / ow) / sx; + float dy = ((float)ptop / oh) / sy; - image sized = resize_image(cropped, w, h); - if (flip) flip_image(sized); - random_distort_image(sized, hue, saturation, exposure); - d.X.vals[i] = sized.data; + image sized = resize_image(cropped, w, h); + if (flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; - fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, small_object, w, h); + fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, small_object, w, h); - free_image(orig); - free_image(cropped); - } - free(random_paths); - return d; + free_image(orig); + free_image(cropped); + } + free(random_paths); + return d; } -#endif // OPENCV +#endif // OPENCV void *load_thread(void *ptr) { - //srand(time(0)); + //srand(time(0)); //printf("Loading data: %d\n", random_gen()); load_args a = *(struct load_args*)ptr; if(a.exposure == 0) a.exposure = 1; @@ -878,9 +878,9 @@ void *load_thread(void *ptr) } else if (a.type == IMAGE_DATA){ *(a.im) = load_image(a.path, 0, 0, a.c); *(a.resized) = resize_image(*(a.im), a.w, a.h); - }else if (a.type == LETTERBOX_DATA) { - *(a.im) = load_image(a.path, 0, 0, a.c); - *(a.resized) = letterbox_image(*(a.im), a.w, a.h); + }else if (a.type == LETTERBOX_DATA) { + *(a.im) = load_image(a.path, 0, 0, a.c); + *(a.resized) = letterbox_image(*(a.im), a.w, a.h); } else if (a.type == TAG_DATA){ *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); } @@ -899,7 +899,7 @@ pthread_t load_data_in_thread(load_args args) void *load_threads(void *ptr) { - //srand(time(0)); + //srand(time(0)); int i; load_args args = *(load_args *)ptr; if (args.threads == 0) args.threads = 1; diff --git a/src/demo.c b/src/demo.c index 34627ce1..1b6b9521 100644 --- a/src/demo.c +++ b/src/demo.c @@ -64,25 +64,25 @@ static int letter_box = 0; void *fetch_in_thread(void *ptr) { //in = get_image_from_stream(cap); - int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream - if(letter_box) - in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, cpp_video_capture, dont_close_stream); - else - in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, cpp_video_capture, dont_close_stream); + int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream + if(letter_box) + in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, cpp_video_capture, dont_close_stream); + else + in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, cpp_video_capture, dont_close_stream); if(!in_s.data){ //error("Stream closed."); - printf("Stream closed.\n"); - flag_exit = 1; - return EXIT_FAILURE; + printf("Stream closed.\n"); + flag_exit = 1; + return EXIT_FAILURE; } //in_s = resize_image(in, net.w, net.h); - + return 0; } void *detect_in_thread(void *ptr) { - float nms = .45; // 0.4F + float nms = .45; // 0.4F layer l = net.layers[net.n-1]; float *X = det_s.data; @@ -94,29 +94,29 @@ void *detect_in_thread(void *ptr) free_image(det_s); - int nboxes = 0; - detection *dets = NULL; - if (letter_box) - dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box - else - dets = get_network_boxes(&net, det_s.w, det_s.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized - //if (nms) do_nms_obj(dets, nboxes, l.classes, nms); // bad results - if (nms) do_nms_sort(dets, nboxes, l.classes, nms); - + int nboxes = 0; + detection *dets = NULL; + if (letter_box) + dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box + else + dets = get_network_boxes(&net, det_s.w, det_s.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized + //if (nms) do_nms_obj(dets, nboxes, l.classes, nms); // bad results + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + printf("\033[2J"); printf("\033[1;1H"); printf("\nFPS:%.1f\n",fps); printf("Objects:\n\n"); - ipl_images[demo_index] = det_img; - det_img = ipl_images[(demo_index + FRAMES / 2 + 1) % FRAMES]; + ipl_images[demo_index] = det_img; + det_img = ipl_images[(demo_index + FRAMES / 2 + 1) % FRAMES]; demo_index = (demo_index + 1)%FRAMES; - - draw_detections_cv_v3(det_img, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes, demo_ext_output); - free_detections(dets, nboxes); - return 0; + draw_detections_cv_v3(det_img, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes, demo_ext_output); + free_detections(dets, nboxes); + + return 0; } double get_wall_time() @@ -129,7 +129,7 @@ double get_wall_time() } void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, - int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show, int ext_output) + int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show, int ext_output) { //skip = frame_skip; image **alphabet = load_alphabet(); @@ -138,40 +138,40 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int demo_alphabet = alphabet; demo_classes = classes; demo_thresh = thresh; - demo_ext_output = ext_output; + demo_ext_output = ext_output; printf("Demo\n"); - net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 + net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 if(weightfile){ load_weights(&net, weightfile); } //set_batch_network(&net, 1); - fuse_conv_batchnorm(net); + fuse_conv_batchnorm(net); srand(2222222); if(filename){ printf("video file: %s\n", filename); -//#ifdef CV_VERSION_EPOCH // OpenCV 2.x -// cap = cvCaptureFromFile(filename); -//#else // OpenCV 3.x - cpp_video_capture = 1; - cap = get_capture_video_stream(filename); +//#ifdef CV_VERSION_EPOCH // OpenCV 2.x +// cap = cvCaptureFromFile(filename); +//#else // OpenCV 3.x + cpp_video_capture = 1; + cap = get_capture_video_stream(filename); //#endif }else{ - printf("Webcam index: %d\n", cam_index); -//#ifdef CV_VERSION_EPOCH // OpenCV 2.x + printf("Webcam index: %d\n", cam_index); +//#ifdef CV_VERSION_EPOCH // OpenCV 2.x // cap = cvCaptureFromCAM(cam_index); -//#else // OpenCV 3.x - cpp_video_capture = 1; - cap = get_capture_webcam(cam_index); +//#else // OpenCV 3.x + cpp_video_capture = 1; + cap = get_capture_webcam(cam_index); //#endif } - if (!cap) { + if (!cap) { #ifdef WIN32 - printf("Check that you have copied file opencv_ffmpeg340_64.dll to the same directory where is darknet.exe \n"); + printf("Check that you have copied file opencv_ffmpeg340_64.dll to the same directory where is darknet.exe \n"); #endif - error("Couldn't connect to webcam.\n"); - } + error("Couldn't connect to webcam.\n"); + } layer l = net.layers[net.n-1]; int j; @@ -184,51 +184,51 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *)); - flag_exit = 0; + flag_exit = 0; pthread_t fetch_thread; pthread_t detect_thread; fetch_in_thread(0); - det_img = in_img; + det_img = in_img; det_s = in_s; fetch_in_thread(0); detect_in_thread(0); - det_img = in_img; + det_img = in_img; det_s = in_s; for(j = 0; j < FRAMES/2; ++j){ fetch_in_thread(0); detect_in_thread(0); - det_img = in_img; + det_img = in_img; det_s = in_s; } int count = 0; if(!prefix && !dont_show){ - cvNamedWindow("Demo", CV_WINDOW_NORMAL); + cvNamedWindow("Demo", CV_WINDOW_NORMAL); cvMoveWindow("Demo", 0, 0); cvResizeWindow("Demo", 1352, 1013); } - CvVideoWriter* output_video_writer = NULL; // cv::VideoWriter output_video; - if (out_filename && !flag_exit) - { - CvSize size; - size.width = det_img->width, size.height = det_img->height; - int src_fps = 25; - src_fps = get_stream_fps(cap, cpp_video_capture); + CvVideoWriter* output_video_writer = NULL; // cv::VideoWriter output_video; + if (out_filename && !flag_exit) + { + CvSize size; + size.width = det_img->width, size.height = det_img->height; + int src_fps = 25; + src_fps = get_stream_fps(cap, cpp_video_capture); - //const char* output_name = "test_dnn_out.avi"; - //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('H', '2', '6', '4'), src_fps, size, 1); - output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('D', 'I', 'V', 'X'), src_fps, size, 1); - //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('M', 'J', 'P', 'G'), src_fps, size, 1); - //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('M', 'P', '4', 'V'), src_fps, size, 1); - //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('M', 'P', '4', '2'), src_fps, size, 1); - //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('X', 'V', 'I', 'D'), src_fps, size, 1); - //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('W', 'M', 'V', '2'), src_fps, size, 1); - } + //const char* output_name = "test_dnn_out.avi"; + //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('H', '2', '6', '4'), src_fps, size, 1); + output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('D', 'I', 'V', 'X'), src_fps, size, 1); + //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('M', 'J', 'P', 'G'), src_fps, size, 1); + //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('M', 'P', '4', 'V'), src_fps, size, 1); + //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('M', 'P', '4', '2'), src_fps, size, 1); + //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('X', 'V', 'I', 'D'), src_fps, size, 1); + //output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('W', 'M', 'V', '2'), src_fps, size, 1); + } double before = get_wall_time(); @@ -239,66 +239,66 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); if(!prefix){ - if (!dont_show) { - show_image_cv_ipl(show_img, "Demo"); - int c = cvWaitKey(1); - if (c == 10) { - if (frame_skip == 0) frame_skip = 60; - else if (frame_skip == 4) frame_skip = 0; - else if (frame_skip == 60) frame_skip = 4; - else frame_skip = 0; - } - else if (c == 27 || c == 1048603) // ESC - exit (OpenCV 2.x / 3.x) - { - flag_exit = 1; - } - } + if (!dont_show) { + show_image_cv_ipl(show_img, "Demo"); + int c = cvWaitKey(1); + if (c == 10) { + if (frame_skip == 0) frame_skip = 60; + else if (frame_skip == 4) frame_skip = 0; + else if (frame_skip == 60) frame_skip = 4; + else frame_skip = 0; + } + else if (c == 27 || c == 1048603) // ESC - exit (OpenCV 2.x / 3.x) + { + flag_exit = 1; + } + } }else{ char buff[256]; sprintf(buff, "%s_%08d.jpg", prefix, count); - cvSaveImage(buff, show_img, 0); + cvSaveImage(buff, show_img, 0); //save_image(disp, buff); } - // if you run it with param -http_port 8090 then open URL in your web-browser: http://localhost:8090 - if (http_stream_port > 0 && show_img) { - //int port = 8090; - int port = http_stream_port; - int timeout = 200; - int jpeg_quality = 30; // 1 - 100 - send_mjpeg(show_img, port, timeout, jpeg_quality); - } + // if you run it with param -http_port 8090 then open URL in your web-browser: http://localhost:8090 + if (http_stream_port > 0 && show_img) { + //int port = 8090; + int port = http_stream_port; + int timeout = 200; + int jpeg_quality = 30; // 1 - 100 + send_mjpeg(show_img, port, timeout, jpeg_quality); + } - // save video file - if (output_video_writer && show_img) { - cvWriteFrame(output_video_writer, show_img); - printf("\n cvWriteFrame \n"); - } + // save video file + if (output_video_writer && show_img) { + cvWriteFrame(output_video_writer, show_img); + printf("\n cvWriteFrame \n"); + } - cvReleaseImage(&show_img); + cvReleaseImage(&show_img); pthread_join(fetch_thread, 0); pthread_join(detect_thread, 0); - if (flag_exit == 1) break; + if (flag_exit == 1) break; if(delay == 0){ - show_img = det_img; + show_img = det_img; } - det_img = in_img; + det_img = in_img; det_s = in_s; }else { fetch_in_thread(0); - det_img = in_img; + det_img = in_img; det_s = in_s; detect_in_thread(0); - show_img = det_img; - if (!dont_show) { - show_image_cv_ipl(show_img, "Demo"); - cvWaitKey(1); - } - cvReleaseImage(&show_img); + show_img = det_img; + if (!dont_show) { + show_image_cv_ipl(show_img, "Demo"); + cvWaitKey(1); + } + cvReleaseImage(&show_img); } --delay; if(delay < 0){ @@ -310,42 +310,42 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int before = after; } } - printf("input video stream closed. \n"); - if (output_video_writer) { - cvReleaseVideoWriter(&output_video_writer); - printf("output_video_writer closed. \n"); - } + printf("input video stream closed. \n"); + if (output_video_writer) { + cvReleaseVideoWriter(&output_video_writer); + printf("output_video_writer closed. \n"); + } - // free memory - cvReleaseImage(&show_img); - cvReleaseImage(&in_img); - free_image(in_s); + // free memory + cvReleaseImage(&show_img); + cvReleaseImage(&in_img); + free_image(in_s); - free(avg); - for (j = 0; j < FRAMES; ++j) free(predictions[j]); - for (j = 0; j < FRAMES; ++j) free_image(images[j]); + free(avg); + for (j = 0; j < FRAMES; ++j) free(predictions[j]); + for (j = 0; j < FRAMES; ++j) free_image(images[j]); - for (j = 0; j < l.w*l.h*l.n; ++j) free(probs[j]); - free(boxes); - free(probs); + for (j = 0; j < l.w*l.h*l.n; ++j) free(probs[j]); + free(boxes); + free(probs); - free_ptrs(names, net.layers[net.n - 1].classes); + free_ptrs(names, net.layers[net.n - 1].classes); - int i; - const int nsize = 8; - for (j = 0; j < nsize; ++j) { - for (i = 32; i < 127; ++i) { - free_image(alphabet[j][i]); - } - free(alphabet[j]); - } - free(alphabet); + int i; + const int nsize = 8; + for (j = 0; j < nsize; ++j) { + for (i = 32; i < 127; ++i) { + free_image(alphabet[j][i]); + } + free(alphabet[j]); + } + free(alphabet); - free_network(net); + free_network(net); } #else void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, - int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show, int ext_output) + int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show, int ext_output) { fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); } diff --git a/src/detector.c b/src/detector.c index 0a0ecdad..244b4c3d 100644 --- a/src/detector.c +++ b/src/detector.c @@ -27,7 +27,7 @@ IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size); void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches); -#endif // OPENCV +#endif // OPENCV static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; @@ -61,14 +61,14 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i srand(time(0)); network net = nets[0]; - const int actual_batch_size = net.batch * net.subdivisions; - if (actual_batch_size == 1) { - printf("\n Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64 \n"); - getchar(); - } - else if (actual_batch_size < 64) { - printf("\n Warning: You set batch=%d lower than 64! It is recommended to set batch=64 subdivision=64 \n", actual_batch_size); - } + const int actual_batch_size = net.batch * net.subdivisions; + if (actual_batch_size == 1) { + printf("\n Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64 \n"); + getchar(); + } + else if (actual_batch_size < 64) { + printf("\n Warning: You set batch=%d lower than 64! It is recommended to set batch=64 subdivision=64 \n", actual_batch_size); + } int imgs = net.batch * net.subdivisions * ngpus; printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); @@ -83,26 +83,26 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i //int N = plist->size; char **paths = (char **)list_to_array(plist); - int init_w = net.w; - int init_h = net.h; - int iter_save; - iter_save = get_current_batch(net); + int init_w = net.w; + int init_h = net.h; + int iter_save; + iter_save = get_current_batch(net); load_args args = {0}; args.w = net.w; args.h = net.h; - args.c = net.c; - args.paths = paths; + args.c = net.c; + args.paths = paths; args.n = imgs; args.m = plist->size; args.classes = classes; args.flip = net.flip; args.jitter = jitter; args.num_boxes = l.max_boxes; - args.small_object = net.small_object; + args.small_object = net.small_object; args.d = &buffer; args.type = DETECTION_DATA; - args.threads = 16; // 64 + args.threads = 16; // 64 args.angle = net.angle; args.exposure = net.exposure; @@ -110,40 +110,40 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i args.hue = net.hue; #ifdef OPENCV - args.threads = 3 * ngpus; - IplImage* img = NULL; - float max_img_loss = 5; - int number_of_lines = 100; - int img_size = 1000; - if (!dont_show) - img = draw_train_chart(max_img_loss, net.max_batches, number_of_lines, img_size); -#endif //OPENCV + args.threads = 3 * ngpus; + IplImage* img = NULL; + float max_img_loss = 5; + int number_of_lines = 100; + int img_size = 1000; + if (!dont_show) + img = draw_train_chart(max_img_loss, net.max_batches, number_of_lines, img_size); +#endif //OPENCV pthread_t load_thread = load_data(args); double time; int count = 0; //while(i*imgs < N*120){ while(get_current_batch(net) < net.max_batches){ - if(l.random && count++%10 == 0){ + if(l.random && count++%10 == 0){ printf("Resizing\n"); - //int dim = (rand() % 12 + (init_w/32 - 5)) * 32; // +-160 + //int dim = (rand() % 12 + (init_w/32 - 5)) * 32; // +-160 //int dim = (rand() % 4 + 16) * 32; - //if (get_current_batch(net)+100 > net.max_batches) dim = 544; - - //int random_val = rand() % 12; - //int dim_w = (random_val + (init_w / 32 - 5)) * 32; // +-160 - //int dim_h = (random_val + (init_h / 32 - 5)) * 32; // +-160 + //if (get_current_batch(net)+100 > net.max_batches) dim = 544; - float random_val = rand_scale(1.4); // *x or /x - int dim_w = roundl(random_val*init_w / 32) * 32; - int dim_h = roundl(random_val*init_h / 32) * 32; + //int random_val = rand() % 12; + //int dim_w = (random_val + (init_w / 32 - 5)) * 32; // +-160 + //int dim_h = (random_val + (init_h / 32 - 5)) * 32; // +-160 - if (dim_w < 32) dim_w = 32; - if (dim_h < 32) dim_h = 32; + float random_val = rand_scale(1.4); // *x or /x + int dim_w = roundl(random_val*init_w / 32) * 32; + int dim_h = roundl(random_val*init_h / 32) * 32; - printf("%d x %d \n", dim_w, dim_h); - args.w = dim_w; - args.h = dim_h; + if (dim_w < 32) dim_w = 32; + if (dim_h < 32) dim_h = 32; + + printf("%d x %d \n", dim_w, dim_h); + args.w = dim_w; + args.h = dim_h; pthread_join(load_thread, 0); train = buffer; @@ -190,28 +190,28 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i #else loss = train_network(net, train); #endif - if (avg_loss < 0 || avg_loss != avg_loss) avg_loss = loss; // if(-inf or nan) + if (avg_loss < 0 || avg_loss != avg_loss) avg_loss = loss; // if(-inf or nan) avg_loss = avg_loss*.9 + loss*.1; i = get_current_batch(net); printf("\n %d: %f, %f avg loss, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), (what_time_is_it_now()-time), i*imgs); #ifdef OPENCV - if(!dont_show) - draw_train_loss(img, img_size, avg_loss, max_img_loss, i, net.max_batches); -#endif // OPENCV + if(!dont_show) + draw_train_loss(img, img_size, avg_loss, max_img_loss, i, net.max_batches); +#endif // OPENCV - //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) { - //if (i % 100 == 0) { - if(i >= (iter_save + 100)) { - iter_save = i; + //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) { + //if (i % 100 == 0) { + if(i >= (iter_save + 100)) { + iter_save = i; #ifdef GPU - if (ngpus != 1) sync_nets(nets, ngpus, 0); + if (ngpus != 1) sync_nets(nets, ngpus, 0); #endif - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } free_data(train); } #ifdef GPU @@ -222,870 +222,870 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i save_weights(net, buff); #ifdef OPENCV - cvReleaseImage(&img); - cvDestroyAllWindows(); + cvReleaseImage(&img); + cvDestroyAllWindows(); #endif - // free memory - pthread_join(load_thread, 0); - free_data(buffer); + // free memory + pthread_join(load_thread, 0); + free_data(buffer); - free(base); - free(paths); - free_list_contents(plist); - free_list(plist); + free(base); + free(paths); + free_list_contents(plist); + free_list(plist); - free_list_contents_kvp(options); - free_list(options); + free_list_contents_kvp(options); + free_list(options); - free(nets); - free_network(net); + free(nets); + free_network(net); } static int get_coco_image_id(char *filename) { - char *p = strrchr(filename, '/'); - char *c = strrchr(filename, '_'); - if (c) p = c; - return atoi(p + 1); + char *p = strrchr(filename, '/'); + char *c = strrchr(filename, '_'); + if (c) p = c; + return atoi(p + 1); } static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) { - int i, j; - int image_id = get_coco_image_id(image_path); - for (i = 0; i < num_boxes; ++i) { - float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; - float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; - float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; - float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; + int i, j; + int image_id = get_coco_image_id(image_path); + for (i = 0; i < num_boxes; ++i) { + float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; - if (xmin < 0) xmin = 0; - if (ymin < 0) ymin = 0; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; - float bx = xmin; - float by = ymin; - float bw = xmax - xmin; - float bh = ymax - ymin; + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; - for (j = 0; j < classes; ++j) { - if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); - } - } + for (j = 0; j < classes; ++j) { + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } } void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) { - int i, j; - for (i = 0; i < total; ++i) { - float xmin = dets[i].bbox.x - dets[i].bbox.w / 2. + 1; - float xmax = dets[i].bbox.x + dets[i].bbox.w / 2. + 1; - float ymin = dets[i].bbox.y - dets[i].bbox.h / 2. + 1; - float ymax = dets[i].bbox.y + dets[i].bbox.h / 2. + 1; + int i, j; + for (i = 0; i < total; ++i) { + float xmin = dets[i].bbox.x - dets[i].bbox.w / 2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w / 2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h / 2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h / 2. + 1; - if (xmin < 1) xmin = 1; - if (ymin < 1) ymin = 1; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; + if (xmin < 1) xmin = 1; + if (ymin < 1) ymin = 1; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; - for (j = 0; j < classes; ++j) { - if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], - xmin, ymin, xmax, ymax); - } - } + for (j = 0; j < classes; ++j) { + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } } void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) { - int i, j; - for (i = 0; i < total; ++i) { - float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; - float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; - float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; - float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; + int i, j; + for (i = 0; i < total; ++i) { + float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; - if (xmin < 0) xmin = 0; - if (ymin < 0) ymin = 0; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; - for (j = 0; j < classes; ++j) { - int class = j; - if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[class], - xmin, ymin, xmax, ymax); - } - } + for (j = 0; j < classes; ++j) { + int class = j; + if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[class], + xmin, ymin, xmax, ymax); + } + } } void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) { - int j; - list *options = read_data_cfg(datacfg); - char *valid_images = option_find_str(options, "valid", "data/train.list"); - char *name_list = option_find_str(options, "names", "data/names.list"); - char *prefix = option_find_str(options, "results", "results"); - char **names = get_labels(name_list); - char *mapf = option_find_str(options, "map", 0); - int *map = 0; - if (mapf) map = read_map(mapf); + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); - network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 - if (weightfile) { - load_weights(&net, weightfile); - } - //set_batch_network(&net, 1); - fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - srand(time(0)); + network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 + if (weightfile) { + load_weights(&net, weightfile); + } + //set_batch_network(&net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + srand(time(0)); - list *plist = get_paths(valid_images); - char **paths = (char **)list_to_array(plist); + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); - layer l = net.layers[net.n - 1]; - int classes = l.classes; + layer l = net.layers[net.n - 1]; + int classes = l.classes; - char buff[1024]; - char *type = option_find_str(options, "eval", "voc"); - FILE *fp = 0; - FILE **fps = 0; - int coco = 0; - int imagenet = 0; - if (0 == strcmp(type, "coco")) { - if (!outfile) outfile = "coco_results"; - snprintf(buff, 1024, "%s/%s.json", prefix, outfile); - fp = fopen(buff, "w"); - fprintf(fp, "[\n"); - coco = 1; - } - else if (0 == strcmp(type, "imagenet")) { - if (!outfile) outfile = "imagenet-detection"; - snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); - fp = fopen(buff, "w"); - imagenet = 1; - classes = 200; - } - else { - if (!outfile) outfile = "comp4_det_test_"; - fps = calloc(classes, sizeof(FILE *)); - for (j = 0; j < classes; ++j) { - snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); - fps[j] = fopen(buff, "w"); - } - } + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if (0 == strcmp(type, "coco")) { + if (!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } + else if (0 == strcmp(type, "imagenet")) { + if (!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } + else { + if (!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for (j = 0; j < classes; ++j) { + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } - int m = plist->size; - int i = 0; - int t; + int m = plist->size; + int i = 0; + int t; - float thresh = .005; - float nms = .45; + float thresh = .005; + float nms = .45; - int nthreads = 4; - image *val = calloc(nthreads, sizeof(image)); - image *val_resized = calloc(nthreads, sizeof(image)); - image *buf = calloc(nthreads, sizeof(image)); - image *buf_resized = calloc(nthreads, sizeof(image)); - pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); - load_args args = { 0 }; - args.w = net.w; - args.h = net.h; - args.c = net.c; - args.type = IMAGE_DATA; - //args.type = LETTERBOX_DATA; + load_args args = { 0 }; + args.w = net.w; + args.h = net.h; + args.c = net.c; + args.type = IMAGE_DATA; + //args.type = LETTERBOX_DATA; - for (t = 0; t < nthreads; ++t) { - args.path = paths[i + t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - time_t start = time(0); - for (i = nthreads; i < m + nthreads; i += nthreads) { - fprintf(stderr, "%d\n", i); - for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { - pthread_join(thr[t], 0); - val[t] = buf[t]; - val_resized[t] = buf_resized[t]; - } - for (t = 0; t < nthreads && i + t < m; ++t) { - args.path = paths[i + t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { - char *path = paths[i + t - nthreads]; - char *id = basecfg(path); - float *X = val_resized[t].data; - network_predict(net, X); - int w = val[t].w; - int h = val[t].h; - int nboxes = 0; - int letterbox = (args.type == LETTERBOX_DATA); - detection *dets = get_network_boxes(&net, w, h, thresh, .5, map, 0, &nboxes, letterbox); - if (nms) do_nms_sort(dets, nboxes, classes, nms); - if (coco) { - print_cocos(fp, path, dets, nboxes, classes, w, h); - } - else if (imagenet) { - print_imagenet_detections(fp, i + t - nthreads + 1, dets, nboxes, classes, w, h); - } - else { - print_detector_detections(fps, id, dets, nboxes, classes, w, h); - } - free_detections(dets, nboxes); - free(id); - free_image(val[t]); - free_image(val_resized[t]); - } - } - for (j = 0; j < classes; ++j) { - if (fps) fclose(fps[j]); - } - if (coco) { - fseek(fp, -2, SEEK_CUR); - fprintf(fp, "\n]\n"); - fclose(fp); - } - fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)time(0) - start); + for (t = 0; t < nthreads; ++t) { + args.path = paths[i + t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for (i = nthreads; i < m + nthreads; i += nthreads) { + fprintf(stderr, "%d\n", i); + for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for (t = 0; t < nthreads && i + t < m; ++t) { + args.path = paths[i + t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { + char *path = paths[i + t - nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + int letterbox = (args.type == LETTERBOX_DATA); + detection *dets = get_network_boxes(&net, w, h, thresh, .5, map, 0, &nboxes, letterbox); + if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (coco) { + print_cocos(fp, path, dets, nboxes, classes, w, h); + } + else if (imagenet) { + print_imagenet_detections(fp, i + t - nthreads + 1, dets, nboxes, classes, w, h); + } + else { + print_detector_detections(fps, id, dets, nboxes, classes, w, h); + } + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for (j = 0; j < classes; ++j) { + if (fps) fclose(fps[j]); + } + if (coco) { + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)time(0) - start); } void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile) { - network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 - if (weightfile) { - load_weights(&net, weightfile); - } - //set_batch_network(&net, 1); - fuse_conv_batchnorm(net); - srand(time(0)); + network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 + if (weightfile) { + load_weights(&net, weightfile); + } + //set_batch_network(&net, 1); + fuse_conv_batchnorm(net); + srand(time(0)); - //list *plist = get_paths("data/coco_val_5k.list"); - list *options = read_data_cfg(datacfg); - char *valid_images = option_find_str(options, "valid", "data/train.txt"); - list *plist = get_paths(valid_images); - char **paths = (char **)list_to_array(plist); + //list *plist = get_paths("data/coco_val_5k.list"); + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.txt"); + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); - layer l = net.layers[net.n - 1]; + layer l = net.layers[net.n - 1]; - int j, k; + int j, k; - int m = plist->size; - int i = 0; + int m = plist->size; + int i = 0; - float thresh = .001; - float iou_thresh = .5; - float nms = .4; + float thresh = .001; + float iou_thresh = .5; + float nms = .4; - int total = 0; - int correct = 0; - int proposals = 0; - float avg_iou = 0; + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; - for (i = 0; i < m; ++i) { - char *path = paths[i]; - image orig = load_image(path, 0, 0, net.c); - image sized = resize_image(orig, net.w, net.h); - char *id = basecfg(path); - network_predict(net, sized.data); - int nboxes = 0; - int letterbox = 0; - detection *dets = get_network_boxes(&net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes, letterbox); - if (nms) do_nms_obj(dets, nboxes, 1, nms); + for (i = 0; i < m; ++i) { + char *path = paths[i]; + image orig = load_image(path, 0, 0, net.c); + image sized = resize_image(orig, net.w, net.h); + char *id = basecfg(path); + network_predict(net, sized.data); + int nboxes = 0; + int letterbox = 0; + detection *dets = get_network_boxes(&net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes, letterbox); + if (nms) do_nms_obj(dets, nboxes, 1, nms); - char labelpath[4096]; - replace_image_to_label(path, labelpath); + char labelpath[4096]; + replace_image_to_label(path, labelpath); - int num_labels = 0; - box_label *truth = read_boxes(labelpath, &num_labels); - for (k = 0; k < nboxes; ++k) { - if (dets[k].objectness > thresh) { - ++proposals; - } - } - for (j = 0; j < num_labels; ++j) { - ++total; - box t = { truth[j].x, truth[j].y, truth[j].w, truth[j].h }; - float best_iou = 0; - for (k = 0; k < nboxes; ++k) { - float iou = box_iou(dets[k].bbox, t); - if (dets[k].objectness > thresh && iou > best_iou) { - best_iou = iou; - } - } - avg_iou += best_iou; - if (best_iou > iou_thresh) { - ++correct; - } - } - //fprintf(stderr, " %s - %s - ", paths[i], labelpath); - fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals / (i + 1), avg_iou * 100 / total, 100.*correct / total); - free(id); - free_image(orig); - free_image(sized); - } + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for (k = 0; k < nboxes; ++k) { + if (dets[k].objectness > thresh) { + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = { truth[j].x, truth[j].y, truth[j].w, truth[j].h }; + float best_iou = 0; + for (k = 0; k < nboxes; ++k) { + float iou = box_iou(dets[k].bbox, t); + if (dets[k].objectness > thresh && iou > best_iou) { + best_iou = iou; + } + } + avg_iou += best_iou; + if (best_iou > iou_thresh) { + ++correct; + } + } + //fprintf(stderr, " %s - %s - ", paths[i], labelpath); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals / (i + 1), avg_iou * 100 / total, 100.*correct / total); + free(id); + free_image(orig); + free_image(sized); + } } typedef struct { - box b; - float p; - int class_id; - int image_index; - int truth_flag; - int unique_truth_index; + box b; + float p; + int class_id; + int image_index; + int truth_flag; + int unique_truth_index; } box_prob; int detections_comparator(const void *pa, const void *pb) { - box_prob a = *(box_prob *)pa; - box_prob b = *(box_prob *)pb; - float diff = a.p - b.p; - if (diff < 0) return 1; - else if (diff > 0) return -1; - return 0; + box_prob a = *(box_prob *)pa; + box_prob b = *(box_prob *)pb; + float diff = a.p - b.p; + if (diff < 0) return 1; + else if (diff > 0) return -1; + return 0; } void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou) { - int j; - list *options = read_data_cfg(datacfg); - char *valid_images = option_find_str(options, "valid", "data/train.txt"); - char *difficult_valid_images = option_find_str(options, "difficult", NULL); - char *name_list = option_find_str(options, "names", "data/names.list"); - char **names = get_labels(name_list); - char *mapf = option_find_str(options, "map", 0); - int *map = 0; - if (mapf) map = read_map(mapf); - FILE* reinforcement_fd = NULL; + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.txt"); + char *difficult_valid_images = option_find_str(options, "difficult", NULL); + char *name_list = option_find_str(options, "names", "data/names.list"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + FILE* reinforcement_fd = NULL; - network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 - if (weightfile) { - load_weights(&net, weightfile); - } - //set_batch_network(&net, 1); - fuse_conv_batchnorm(net); - srand(time(0)); + network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 + if (weightfile) { + load_weights(&net, weightfile); + } + //set_batch_network(&net, 1); + fuse_conv_batchnorm(net); + srand(time(0)); - list *plist = get_paths(valid_images); - char **paths = (char **)list_to_array(plist); + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); - char **paths_dif = NULL; - if (difficult_valid_images) { - list *plist_dif = get_paths(difficult_valid_images); - paths_dif = (char **)list_to_array(plist_dif); - } - - - layer l = net.layers[net.n - 1]; - int classes = l.classes; - - int m = plist->size; - int i = 0; - int t; - - const float thresh = .005; - const float nms = .45; - const float iou_thresh = 0.5; - - int nthreads = 4; - image *val = calloc(nthreads, sizeof(image)); - image *val_resized = calloc(nthreads, sizeof(image)); - image *buf = calloc(nthreads, sizeof(image)); - image *buf_resized = calloc(nthreads, sizeof(image)); - pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); - - load_args args = { 0 }; - args.w = net.w; - args.h = net.h; - args.c = net.c; - args.type = IMAGE_DATA; - //args.type = LETTERBOX_DATA; - - //const float thresh_calc_avg_iou = 0.24; - float avg_iou = 0; - int tp_for_thresh = 0; - int fp_for_thresh = 0; - - box_prob *detections = calloc(1, sizeof(box_prob)); - int detections_count = 0; - int unique_truth_count = 0; - - int *truth_classes_count = calloc(classes, sizeof(int)); - - for (t = 0; t < nthreads; ++t) { - args.path = paths[i + t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - time_t start = time(0); - for (i = nthreads; i < m + nthreads; i += nthreads) { - fprintf(stderr, "%d\n", i); - for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { - pthread_join(thr[t], 0); - val[t] = buf[t]; - val_resized[t] = buf_resized[t]; - } - for (t = 0; t < nthreads && i + t < m; ++t) { - args.path = paths[i + t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { - const int image_index = i + t - nthreads; - char *path = paths[image_index]; - char *id = basecfg(path); - float *X = val_resized[t].data; - network_predict(net, X); - - int nboxes = 0; - int letterbox = (args.type == LETTERBOX_DATA); - float hier_thresh = 0; - detection *dets = get_network_boxes(&net, 1, 1, thresh, hier_thresh, 0, 0, &nboxes, letterbox); - //detection *dets = get_network_boxes(&net, val[t].w, val[t].h, thresh, hier_thresh, 0, 1, &nboxes, letterbox); // for letterbox=1 - if (nms) do_nms_sort(dets, nboxes, l.classes, nms); - - char labelpath[4096]; - replace_image_to_label(path, labelpath); - int num_labels = 0; - box_label *truth = read_boxes(labelpath, &num_labels); - int i, j; - for (j = 0; j < num_labels; ++j) { - truth_classes_count[truth[j].id]++; - } - - // difficult - box_label *truth_dif = NULL; - int num_labels_dif = 0; - if (paths_dif) - { - char *path_dif = paths_dif[image_index]; - - char labelpath_dif[4096]; - replace_image_to_label(path_dif, labelpath_dif); - - truth_dif = read_boxes(labelpath_dif, &num_labels_dif); - } - - const int checkpoint_detections_count = detections_count; - - for (i = 0; i < nboxes; ++i) { - - int class_id; - for (class_id = 0; class_id < classes; ++class_id) { - float prob = dets[i].prob[class_id]; - if (prob > 0) { - detections_count++; - detections = realloc(detections, detections_count * sizeof(box_prob)); - detections[detections_count - 1].b = dets[i].bbox; - detections[detections_count - 1].p = prob; - detections[detections_count - 1].image_index = image_index; - detections[detections_count - 1].class_id = class_id; - detections[detections_count - 1].truth_flag = 0; - detections[detections_count - 1].unique_truth_index = -1; - - int truth_index = -1; - float max_iou = 0; - for (j = 0; j < num_labels; ++j) - { - box t = { truth[j].x, truth[j].y, truth[j].w, truth[j].h }; - //printf(" IoU = %f, prob = %f, class_id = %d, truth[j].id = %d \n", - // box_iou(dets[i].bbox, t), prob, class_id, truth[j].id); - float current_iou = box_iou(dets[i].bbox, t); - if (current_iou > iou_thresh && class_id == truth[j].id) { - if (current_iou > max_iou) { - max_iou = current_iou; - truth_index = unique_truth_count + j; - } - } - } - - // best IoU - if (truth_index > -1) { - detections[detections_count - 1].truth_flag = 1; - detections[detections_count - 1].unique_truth_index = truth_index; - } - else { - // if object is difficult then remove detection - for (j = 0; j < num_labels_dif; ++j) { - box t = { truth_dif[j].x, truth_dif[j].y, truth_dif[j].w, truth_dif[j].h }; - float current_iou = box_iou(dets[i].bbox, t); - if (current_iou > iou_thresh && class_id == truth_dif[j].id) { - --detections_count; - break; - } - } - } - - // calc avg IoU, true-positives, false-positives for required Threshold - if (prob > thresh_calc_avg_iou) { - int z, found = 0; - for (z = checkpoint_detections_count; z < detections_count-1; ++z) - if (detections[z].unique_truth_index == truth_index) { - found = 1; break; - } - - if(truth_index > -1 && found == 0) { - avg_iou += max_iou; - ++tp_for_thresh; - } - else - fp_for_thresh++; - } - } - } - } - - unique_truth_count += num_labels; - - //static int previous_errors = 0; - //int total_errors = fp_for_thresh + (unique_truth_count - tp_for_thresh); - //int errors_in_this_image = total_errors - previous_errors; - //previous_errors = total_errors; - //if(reinforcement_fd == NULL) reinforcement_fd = fopen("reinforcement.txt", "wb"); - //char buff[1000]; - //sprintf(buff, "%s\n", path); - //if(errors_in_this_image > 0) fwrite(buff, sizeof(char), strlen(buff), reinforcement_fd); - - free_detections(dets, nboxes); - free(id); - free_image(val[t]); - free_image(val_resized[t]); - } - } - - if((tp_for_thresh + fp_for_thresh) > 0) - avg_iou = avg_iou / (tp_for_thresh + fp_for_thresh); - - - // SORT(detections) - qsort(detections, detections_count, sizeof(box_prob), detections_comparator); - - typedef struct { - double precision; - double recall; - int tp, fp, fn; - } pr_t; - - // for PR-curve - pr_t **pr = calloc(classes, sizeof(pr_t*)); - for (i = 0; i < classes; ++i) { - pr[i] = calloc(detections_count, sizeof(pr_t)); - } - printf("detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count); + char **paths_dif = NULL; + if (difficult_valid_images) { + list *plist_dif = get_paths(difficult_valid_images); + paths_dif = (char **)list_to_array(plist_dif); + } - int *truth_flags = calloc(unique_truth_count, sizeof(int)); + layer l = net.layers[net.n - 1]; + int classes = l.classes; - int rank; - for (rank = 0; rank < detections_count; ++rank) { - if(rank % 100 == 0) - printf(" rank = %d of ranks = %d \r", rank, detections_count); + int m = plist->size; + int i = 0; + int t; - if (rank > 0) { - int class_id; - for (class_id = 0; class_id < classes; ++class_id) { - pr[class_id][rank].tp = pr[class_id][rank - 1].tp; - pr[class_id][rank].fp = pr[class_id][rank - 1].fp; - } - } + const float thresh = .005; + const float nms = .45; + const float iou_thresh = 0.5; - box_prob d = detections[rank]; - // if (detected && isn't detected before) - if (d.truth_flag == 1) { - if (truth_flags[d.unique_truth_index] == 0) - { - truth_flags[d.unique_truth_index] = 1; - pr[d.class_id][rank].tp++; // true-positive - } - } - else { - pr[d.class_id][rank].fp++; // false-positive - } + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); - for (i = 0; i < classes; ++i) - { - const int tp = pr[i][rank].tp; - const int fp = pr[i][rank].fp; - const int fn = truth_classes_count[i] - tp; // false-negative = objects - true-positive - pr[i][rank].fn = fn; + load_args args = { 0 }; + args.w = net.w; + args.h = net.h; + args.c = net.c; + args.type = IMAGE_DATA; + //args.type = LETTERBOX_DATA; - if ((tp + fp) > 0) pr[i][rank].precision = (double)tp / (double)(tp + fp); - else pr[i][rank].precision = 0; + //const float thresh_calc_avg_iou = 0.24; + float avg_iou = 0; + int tp_for_thresh = 0; + int fp_for_thresh = 0; - if ((tp + fn) > 0) pr[i][rank].recall = (double)tp / (double)(tp + fn); - else pr[i][rank].recall = 0; - } - } + box_prob *detections = calloc(1, sizeof(box_prob)); + int detections_count = 0; + int unique_truth_count = 0; - free(truth_flags); - - - double mean_average_precision = 0; + int *truth_classes_count = calloc(classes, sizeof(int)); - for (i = 0; i < classes; ++i) { - double avg_precision = 0; - int point; - for (point = 0; point < 11; ++point) { - double cur_recall = point * 0.1; - double cur_precision = 0; - for (rank = 0; rank < detections_count; ++rank) - { - if (pr[i][rank].recall >= cur_recall) { // > or >= - if (pr[i][rank].precision > cur_precision) { - cur_precision = pr[i][rank].precision; - } - } - } - //printf("class_id = %d, point = %d, cur_recall = %.4f, cur_precision = %.4f \n", i, point, cur_recall, cur_precision); + for (t = 0; t < nthreads; ++t) { + args.path = paths[i + t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for (i = nthreads; i < m + nthreads; i += nthreads) { + fprintf(stderr, "%d\n", i); + for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for (t = 0; t < nthreads && i + t < m; ++t) { + args.path = paths[i + t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { + const int image_index = i + t - nthreads; + char *path = paths[image_index]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); - avg_precision += cur_precision; - } - avg_precision = avg_precision / 11; - printf("class_id = %d, name = %s, \t ap = %2.2f %% \n", i, names[i], avg_precision*100); - mean_average_precision += avg_precision; - } - - const float cur_precision = (float)tp_for_thresh / ((float)tp_for_thresh + (float)fp_for_thresh); - const float cur_recall = (float)tp_for_thresh / ((float)tp_for_thresh + (float)(unique_truth_count - tp_for_thresh)); - const float f1_score = 2.F * cur_precision * cur_recall / (cur_precision + cur_recall); - printf(" for thresh = %1.2f, precision = %1.2f, recall = %1.2f, F1-score = %1.2f \n", - thresh_calc_avg_iou, cur_precision, cur_recall, f1_score); + int nboxes = 0; + int letterbox = (args.type == LETTERBOX_DATA); + float hier_thresh = 0; + detection *dets = get_network_boxes(&net, 1, 1, thresh, hier_thresh, 0, 0, &nboxes, letterbox); + //detection *dets = get_network_boxes(&net, val[t].w, val[t].h, thresh, hier_thresh, 0, 1, &nboxes, letterbox); // for letterbox=1 + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); - printf(" for thresh = %0.2f, TP = %d, FP = %d, FN = %d, average IoU = %2.2f %% \n", - thresh_calc_avg_iou, tp_for_thresh, fp_for_thresh, unique_truth_count - tp_for_thresh, avg_iou * 100); + char labelpath[4096]; + replace_image_to_label(path, labelpath); + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + int i, j; + for (j = 0; j < num_labels; ++j) { + truth_classes_count[truth[j].id]++; + } - mean_average_precision = mean_average_precision / classes; - printf("\n mean average precision (mAP) = %f, or %2.2f %% \n", mean_average_precision, mean_average_precision*100); + // difficult + box_label *truth_dif = NULL; + int num_labels_dif = 0; + if (paths_dif) + { + char *path_dif = paths_dif[image_index]; + + char labelpath_dif[4096]; + replace_image_to_label(path_dif, labelpath_dif); + + truth_dif = read_boxes(labelpath_dif, &num_labels_dif); + } + + const int checkpoint_detections_count = detections_count; + + for (i = 0; i < nboxes; ++i) { + + int class_id; + for (class_id = 0; class_id < classes; ++class_id) { + float prob = dets[i].prob[class_id]; + if (prob > 0) { + detections_count++; + detections = realloc(detections, detections_count * sizeof(box_prob)); + detections[detections_count - 1].b = dets[i].bbox; + detections[detections_count - 1].p = prob; + detections[detections_count - 1].image_index = image_index; + detections[detections_count - 1].class_id = class_id; + detections[detections_count - 1].truth_flag = 0; + detections[detections_count - 1].unique_truth_index = -1; + + int truth_index = -1; + float max_iou = 0; + for (j = 0; j < num_labels; ++j) + { + box t = { truth[j].x, truth[j].y, truth[j].w, truth[j].h }; + //printf(" IoU = %f, prob = %f, class_id = %d, truth[j].id = %d \n", + // box_iou(dets[i].bbox, t), prob, class_id, truth[j].id); + float current_iou = box_iou(dets[i].bbox, t); + if (current_iou > iou_thresh && class_id == truth[j].id) { + if (current_iou > max_iou) { + max_iou = current_iou; + truth_index = unique_truth_count + j; + } + } + } + + // best IoU + if (truth_index > -1) { + detections[detections_count - 1].truth_flag = 1; + detections[detections_count - 1].unique_truth_index = truth_index; + } + else { + // if object is difficult then remove detection + for (j = 0; j < num_labels_dif; ++j) { + box t = { truth_dif[j].x, truth_dif[j].y, truth_dif[j].w, truth_dif[j].h }; + float current_iou = box_iou(dets[i].bbox, t); + if (current_iou > iou_thresh && class_id == truth_dif[j].id) { + --detections_count; + break; + } + } + } + + // calc avg IoU, true-positives, false-positives for required Threshold + if (prob > thresh_calc_avg_iou) { + int z, found = 0; + for (z = checkpoint_detections_count; z < detections_count-1; ++z) + if (detections[z].unique_truth_index == truth_index) { + found = 1; break; + } + + if(truth_index > -1 && found == 0) { + avg_iou += max_iou; + ++tp_for_thresh; + } + else + fp_for_thresh++; + } + } + } + } + + unique_truth_count += num_labels; + + //static int previous_errors = 0; + //int total_errors = fp_for_thresh + (unique_truth_count - tp_for_thresh); + //int errors_in_this_image = total_errors - previous_errors; + //previous_errors = total_errors; + //if(reinforcement_fd == NULL) reinforcement_fd = fopen("reinforcement.txt", "wb"); + //char buff[1000]; + //sprintf(buff, "%s\n", path); + //if(errors_in_this_image > 0) fwrite(buff, sizeof(char), strlen(buff), reinforcement_fd); + + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + + if((tp_for_thresh + fp_for_thresh) > 0) + avg_iou = avg_iou / (tp_for_thresh + fp_for_thresh); - for (i = 0; i < classes; ++i) { - free(pr[i]); - } - free(pr); - free(detections); - free(truth_classes_count); + // SORT(detections) + qsort(detections, detections_count, sizeof(box_prob), detections_comparator); - fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); - if (reinforcement_fd != NULL) fclose(reinforcement_fd); + typedef struct { + double precision; + double recall; + int tp, fp, fn; + } pr_t; + + // for PR-curve + pr_t **pr = calloc(classes, sizeof(pr_t*)); + for (i = 0; i < classes; ++i) { + pr[i] = calloc(detections_count, sizeof(pr_t)); + } + printf("detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count); + + + int *truth_flags = calloc(unique_truth_count, sizeof(int)); + + int rank; + for (rank = 0; rank < detections_count; ++rank) { + if(rank % 100 == 0) + printf(" rank = %d of ranks = %d \r", rank, detections_count); + + if (rank > 0) { + int class_id; + for (class_id = 0; class_id < classes; ++class_id) { + pr[class_id][rank].tp = pr[class_id][rank - 1].tp; + pr[class_id][rank].fp = pr[class_id][rank - 1].fp; + } + } + + box_prob d = detections[rank]; + // if (detected && isn't detected before) + if (d.truth_flag == 1) { + if (truth_flags[d.unique_truth_index] == 0) + { + truth_flags[d.unique_truth_index] = 1; + pr[d.class_id][rank].tp++; // true-positive + } + } + else { + pr[d.class_id][rank].fp++; // false-positive + } + + for (i = 0; i < classes; ++i) + { + const int tp = pr[i][rank].tp; + const int fp = pr[i][rank].fp; + const int fn = truth_classes_count[i] - tp; // false-negative = objects - true-positive + pr[i][rank].fn = fn; + + if ((tp + fp) > 0) pr[i][rank].precision = (double)tp / (double)(tp + fp); + else pr[i][rank].precision = 0; + + if ((tp + fn) > 0) pr[i][rank].recall = (double)tp / (double)(tp + fn); + else pr[i][rank].recall = 0; + } + } + + free(truth_flags); + + + double mean_average_precision = 0; + + for (i = 0; i < classes; ++i) { + double avg_precision = 0; + int point; + for (point = 0; point < 11; ++point) { + double cur_recall = point * 0.1; + double cur_precision = 0; + for (rank = 0; rank < detections_count; ++rank) + { + if (pr[i][rank].recall >= cur_recall) { // > or >= + if (pr[i][rank].precision > cur_precision) { + cur_precision = pr[i][rank].precision; + } + } + } + //printf("class_id = %d, point = %d, cur_recall = %.4f, cur_precision = %.4f \n", i, point, cur_recall, cur_precision); + + avg_precision += cur_precision; + } + avg_precision = avg_precision / 11; + printf("class_id = %d, name = %s, \t ap = %2.2f %% \n", i, names[i], avg_precision*100); + mean_average_precision += avg_precision; + } + + const float cur_precision = (float)tp_for_thresh / ((float)tp_for_thresh + (float)fp_for_thresh); + const float cur_recall = (float)tp_for_thresh / ((float)tp_for_thresh + (float)(unique_truth_count - tp_for_thresh)); + const float f1_score = 2.F * cur_precision * cur_recall / (cur_precision + cur_recall); + printf(" for thresh = %1.2f, precision = %1.2f, recall = %1.2f, F1-score = %1.2f \n", + thresh_calc_avg_iou, cur_precision, cur_recall, f1_score); + + printf(" for thresh = %0.2f, TP = %d, FP = %d, FN = %d, average IoU = %2.2f %% \n", + thresh_calc_avg_iou, tp_for_thresh, fp_for_thresh, unique_truth_count - tp_for_thresh, avg_iou * 100); + + mean_average_precision = mean_average_precision / classes; + printf("\n mean average precision (mAP) = %f, or %2.2f %% \n", mean_average_precision, mean_average_precision*100); + + + for (i = 0; i < classes; ++i) { + free(pr[i]); + } + free(pr); + free(detections); + free(truth_classes_count); + + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); + if (reinforcement_fd != NULL) fclose(reinforcement_fd); } #ifdef OPENCV typedef struct { - float w, h; + float w, h; } anchors_t; int anchors_comparator(const void *pa, const void *pb) { - anchors_t a = *(anchors_t *)pa; - anchors_t b = *(anchors_t *)pb; - float diff = b.w*b.h - a.w*a.h; - if (diff < 0) return 1; - else if (diff > 0) return -1; - return 0; + anchors_t a = *(anchors_t *)pa; + anchors_t b = *(anchors_t *)pb; + float diff = b.w*b.h - a.w*a.h; + if (diff < 0) return 1; + else if (diff > 0) return -1; + return 0; } void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) { - printf("\n num_of_clusters = %d, width = %d, height = %d \n", num_of_clusters, width, height); - if (width < 0 || height < 0) { - printf("Usage: darknet detector calc_anchors data/voc.data -num_of_clusters 9 -width 416 -height 416 \n"); - printf("Error: set width and height \n"); - return; - } + printf("\n num_of_clusters = %d, width = %d, height = %d \n", num_of_clusters, width, height); + if (width < 0 || height < 0) { + printf("Usage: darknet detector calc_anchors data/voc.data -num_of_clusters 9 -width 416 -height 416 \n"); + printf("Error: set width and height \n"); + return; + } - //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 }; - float *rel_width_height_array = calloc(1000, sizeof(float)); + //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 }; + float *rel_width_height_array = calloc(1000, sizeof(float)); - list *options = read_data_cfg(datacfg); - char *train_images = option_find_str(options, "train", "data/train.list"); - list *plist = get_paths(train_images); - int number_of_images = plist->size; - char **paths = (char **)list_to_array(plist); + list *options = read_data_cfg(datacfg); + char *train_images = option_find_str(options, "train", "data/train.list"); + list *plist = get_paths(train_images); + int number_of_images = plist->size; + char **paths = (char **)list_to_array(plist); - int number_of_boxes = 0; - printf(" read labels from %d images \n", number_of_images); + int number_of_boxes = 0; + printf(" read labels from %d images \n", number_of_images); - int i, j; - for (i = 0; i < number_of_images; ++i) { - char *path = paths[i]; - char labelpath[4096]; - replace_image_to_label(path, labelpath); + int i, j; + for (i = 0; i < number_of_images; ++i) { + char *path = paths[i]; + char labelpath[4096]; + replace_image_to_label(path, labelpath); - int num_labels = 0; - box_label *truth = read_boxes(labelpath, &num_labels); - //printf(" new path: %s \n", labelpath); - char buff[1024]; - for (j = 0; j < num_labels; ++j) - { - if (truth[j].x > 1 || truth[j].x <= 0 || truth[j].y > 1 || truth[j].y <= 0 || - truth[j].w > 1 || truth[j].w <= 0 || truth[j].h > 1 || truth[j].h <= 0) - { - printf("\n\nWrong label: %s - j = %d, x = %f, y = %f, width = %f, height = %f \n", - labelpath, j, truth[j].x, truth[j].y, truth[j].w, truth[j].h); - sprintf(buff, "echo \"Wrong label: %s - j = %d, x = %f, y = %f, width = %f, height = %f\" >> bad_label.list", - labelpath, j, truth[j].x, truth[j].y, truth[j].w, truth[j].h); - system(buff); - } - number_of_boxes++; - rel_width_height_array = realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float)); - rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width; - rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height; - printf("\r loaded \t image: %d \t box: %d", i+1, number_of_boxes); - } - } - printf("\n all loaded. \n"); + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + //printf(" new path: %s \n", labelpath); + char buff[1024]; + for (j = 0; j < num_labels; ++j) + { + if (truth[j].x > 1 || truth[j].x <= 0 || truth[j].y > 1 || truth[j].y <= 0 || + truth[j].w > 1 || truth[j].w <= 0 || truth[j].h > 1 || truth[j].h <= 0) + { + printf("\n\nWrong label: %s - j = %d, x = %f, y = %f, width = %f, height = %f \n", + labelpath, j, truth[j].x, truth[j].y, truth[j].w, truth[j].h); + sprintf(buff, "echo \"Wrong label: %s - j = %d, x = %f, y = %f, width = %f, height = %f\" >> bad_label.list", + labelpath, j, truth[j].x, truth[j].y, truth[j].w, truth[j].h); + system(buff); + } + number_of_boxes++; + rel_width_height_array = realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float)); + rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width; + rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height; + printf("\r loaded \t image: %d \t box: %d", i+1, number_of_boxes); + } + } + printf("\n all loaded. \n"); - CvMat* points = cvCreateMat(number_of_boxes, 2, CV_32FC1); - CvMat* centers = cvCreateMat(num_of_clusters, 2, CV_32FC1); - CvMat* labels = cvCreateMat(number_of_boxes, 1, CV_32SC1); + CvMat* points = cvCreateMat(number_of_boxes, 2, CV_32FC1); + CvMat* centers = cvCreateMat(num_of_clusters, 2, CV_32FC1); + CvMat* labels = cvCreateMat(number_of_boxes, 1, CV_32SC1); - for (i = 0; i < number_of_boxes; ++i) { - points->data.fl[i * 2] = rel_width_height_array[i * 2]; - points->data.fl[i * 2 + 1] = rel_width_height_array[i * 2 + 1]; - //cvSet1D(points, i * 2, cvScalar(rel_width_height_array[i * 2], 0, 0, 0)); - //cvSet1D(points, i * 2 + 1, cvScalar(rel_width_height_array[i * 2 + 1], 0, 0, 0)); - } + for (i = 0; i < number_of_boxes; ++i) { + points->data.fl[i * 2] = rel_width_height_array[i * 2]; + points->data.fl[i * 2 + 1] = rel_width_height_array[i * 2 + 1]; + //cvSet1D(points, i * 2, cvScalar(rel_width_height_array[i * 2], 0, 0, 0)); + //cvSet1D(points, i * 2 + 1, cvScalar(rel_width_height_array[i * 2 + 1], 0, 0, 0)); + } - const int attemps = 10; - double compactness; + const int attemps = 10; + double compactness; - enum { - KMEANS_RANDOM_CENTERS = 0, - KMEANS_USE_INITIAL_LABELS = 1, - KMEANS_PP_CENTERS = 2 - }; - - printf("\n calculating k-means++ ..."); - // Should be used: distance(box, centroid) = 1 - IoU(box, centroid) - cvKMeans2(points, num_of_clusters, labels, - cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10000, 0), attemps, - 0, KMEANS_PP_CENTERS, - centers, &compactness); + enum { + KMEANS_RANDOM_CENTERS = 0, + KMEANS_USE_INITIAL_LABELS = 1, + KMEANS_PP_CENTERS = 2 + }; - // sort anchors - qsort(centers->data.fl, num_of_clusters, 2*sizeof(float), anchors_comparator); + printf("\n calculating k-means++ ..."); + // Should be used: distance(box, centroid) = 1 - IoU(box, centroid) + cvKMeans2(points, num_of_clusters, labels, + cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10000, 0), attemps, + 0, KMEANS_PP_CENTERS, + centers, &compactness); - //orig 2.0 anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 - //float orig_anch[] = { 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 }; - // worse than ours (even for 19x19 final size - for input size 608x608) + // sort anchors + qsort(centers->data.fl, num_of_clusters, 2*sizeof(float), anchors_comparator); - //orig anchors = 1.3221,1.73145, 3.19275,4.00944, 5.05587,8.09892, 9.47112,4.84053, 11.2364,10.0071 - //float orig_anch[] = { 1.3221,1.73145, 3.19275,4.00944, 5.05587,8.09892, 9.47112,4.84053, 11.2364,10.0071 }; - // orig (IoU=59.90%) better than ours (59.75%) + //orig 2.0 anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 + //float orig_anch[] = { 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 }; + // worse than ours (even for 19x19 final size - for input size 608x608) - //gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 - //float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 }; + //orig anchors = 1.3221,1.73145, 3.19275,4.00944, 5.05587,8.09892, 9.47112,4.84053, 11.2364,10.0071 + //float orig_anch[] = { 1.3221,1.73145, 3.19275,4.00944, 5.05587,8.09892, 9.47112,4.84053, 11.2364,10.0071 }; + // orig (IoU=59.90%) better than ours (59.75%) - // ours: anchors = 9.3813,6.0095, 3.3999,5.3505, 10.9476,11.1992, 5.0161,9.8314, 1.5003,2.1595 - //float orig_anch[] = { 9.3813,6.0095, 3.3999,5.3505, 10.9476,11.1992, 5.0161,9.8314, 1.5003,2.1595 }; - //for (i = 0; i < num_of_clusters * 2; ++i) centers->data.fl[i] = orig_anch[i]; - - //for (i = 0; i < number_of_boxes; ++i) - // printf("%2.2f,%2.2f, ", points->data.fl[i * 2], points->data.fl[i * 2 + 1]); + //gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 + //float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 }; - printf("\n"); - float avg_iou = 0; - for (i = 0; i < number_of_boxes; ++i) { - float box_w = points->data.fl[i * 2]; - float box_h = points->data.fl[i * 2 + 1]; - //int cluster_idx = labels->data.i[i]; - int cluster_idx = 0; - float min_dist = FLT_MAX; - for (j = 0; j < num_of_clusters; ++j) { - float anchor_w = centers->data.fl[j * 2]; - float anchor_h = centers->data.fl[j * 2 + 1]; - float w_diff = anchor_w - box_w; - float h_diff = anchor_h - box_h; - float distance = sqrt(w_diff*w_diff + h_diff*h_diff); - if (distance < min_dist) min_dist = distance, cluster_idx = j; - } - - float anchor_w = centers->data.fl[cluster_idx * 2]; - float anchor_h = centers->data.fl[cluster_idx * 2 + 1]; - float min_w = (box_w < anchor_w) ? box_w : anchor_w; - float min_h = (box_h < anchor_h) ? box_h : anchor_h; - float box_intersect = min_w*min_h; - float box_union = box_w*box_h + anchor_w*anchor_h - box_intersect; - float iou = box_intersect / box_union; - if (iou > 1 || iou < 0) { // || box_w > width || box_h > height) { - printf(" Wrong label: i = %d, box_w = %d, box_h = %d, anchor_w = %d, anchor_h = %d, iou = %f \n", - i, box_w, box_h, anchor_w, anchor_h, iou); - } - else avg_iou += iou; - } - avg_iou = 100 * avg_iou / number_of_boxes; - printf("\n avg IoU = %2.2f %% \n", avg_iou); + // ours: anchors = 9.3813,6.0095, 3.3999,5.3505, 10.9476,11.1992, 5.0161,9.8314, 1.5003,2.1595 + //float orig_anch[] = { 9.3813,6.0095, 3.3999,5.3505, 10.9476,11.1992, 5.0161,9.8314, 1.5003,2.1595 }; + //for (i = 0; i < num_of_clusters * 2; ++i) centers->data.fl[i] = orig_anch[i]; - char buff[1024]; - FILE* fw = fopen("anchors.txt", "wb"); - printf("\nSaving anchors to the file: anchors.txt \n"); - printf("anchors = "); - for (i = 0; i < num_of_clusters; ++i) { - sprintf(buff, "%2.4f,%2.4f", centers->data.fl[i * 2], centers->data.fl[i * 2 + 1]); - printf("%s", buff); - fwrite(buff, sizeof(char), strlen(buff), fw); - if (i + 1 < num_of_clusters) { - fwrite(", ", sizeof(char), 2, fw); - printf(", "); - } - } - printf("\n"); - fclose(fw); + //for (i = 0; i < number_of_boxes; ++i) + // printf("%2.2f,%2.2f, ", points->data.fl[i * 2], points->data.fl[i * 2 + 1]); - if (show) { - size_t img_size = 700; - IplImage* img = cvCreateImage(cvSize(img_size, img_size), 8, 3); - cvZero(img); - for (j = 0; j < num_of_clusters; ++j) { - CvPoint pt1, pt2; - pt1.x = pt1.y = 0; - pt2.x = centers->data.fl[j * 2] * img_size / width; - pt2.y = centers->data.fl[j * 2 + 1] * img_size / height; - cvRectangle(img, pt1, pt2, CV_RGB(255, 255, 255), 1, 8, 0); - } + printf("\n"); + float avg_iou = 0; + for (i = 0; i < number_of_boxes; ++i) { + float box_w = points->data.fl[i * 2]; + float box_h = points->data.fl[i * 2 + 1]; + //int cluster_idx = labels->data.i[i]; + int cluster_idx = 0; + float min_dist = FLT_MAX; + for (j = 0; j < num_of_clusters; ++j) { + float anchor_w = centers->data.fl[j * 2]; + float anchor_h = centers->data.fl[j * 2 + 1]; + float w_diff = anchor_w - box_w; + float h_diff = anchor_h - box_h; + float distance = sqrt(w_diff*w_diff + h_diff*h_diff); + if (distance < min_dist) min_dist = distance, cluster_idx = j; + } - for (i = 0; i < number_of_boxes; ++i) { - CvPoint pt; - pt.x = points->data.fl[i * 2] * img_size / width; - pt.y = points->data.fl[i * 2 + 1] * img_size / height; - int cluster_idx = labels->data.i[i]; - int red_id = (cluster_idx * (uint64_t)123 + 55) % 255; - int green_id = (cluster_idx * (uint64_t)321 + 33) % 255; - int blue_id = (cluster_idx * (uint64_t)11 + 99) % 255; - cvCircle(img, pt, 1, CV_RGB(red_id, green_id, blue_id), CV_FILLED, 8, 0); - //if(pt.x > img_size || pt.y > img_size) printf("\n pt.x = %d, pt.y = %d \n", pt.x, pt.y); - } - cvShowImage("clusters", img); - cvWaitKey(0); - cvReleaseImage(&img); - cvDestroyAllWindows(); - } + float anchor_w = centers->data.fl[cluster_idx * 2]; + float anchor_h = centers->data.fl[cluster_idx * 2 + 1]; + float min_w = (box_w < anchor_w) ? box_w : anchor_w; + float min_h = (box_h < anchor_h) ? box_h : anchor_h; + float box_intersect = min_w*min_h; + float box_union = box_w*box_h + anchor_w*anchor_h - box_intersect; + float iou = box_intersect / box_union; + if (iou > 1 || iou < 0) { // || box_w > width || box_h > height) { + printf(" Wrong label: i = %d, box_w = %d, box_h = %d, anchor_w = %d, anchor_h = %d, iou = %f \n", + i, box_w, box_h, anchor_w, anchor_h, iou); + } + else avg_iou += iou; + } + avg_iou = 100 * avg_iou / number_of_boxes; + printf("\n avg IoU = %2.2f %% \n", avg_iou); - free(rel_width_height_array); - cvReleaseMat(&points); - cvReleaseMat(¢ers); - cvReleaseMat(&labels); + char buff[1024]; + FILE* fw = fopen("anchors.txt", "wb"); + printf("\nSaving anchors to the file: anchors.txt \n"); + printf("anchors = "); + for (i = 0; i < num_of_clusters; ++i) { + sprintf(buff, "%2.4f,%2.4f", centers->data.fl[i * 2], centers->data.fl[i * 2 + 1]); + printf("%s", buff); + fwrite(buff, sizeof(char), strlen(buff), fw); + if (i + 1 < num_of_clusters) { + fwrite(", ", sizeof(char), 2, fw); + printf(", "); + } + } + printf("\n"); + fclose(fw); + + if (show) { + size_t img_size = 700; + IplImage* img = cvCreateImage(cvSize(img_size, img_size), 8, 3); + cvZero(img); + for (j = 0; j < num_of_clusters; ++j) { + CvPoint pt1, pt2; + pt1.x = pt1.y = 0; + pt2.x = centers->data.fl[j * 2] * img_size / width; + pt2.y = centers->data.fl[j * 2 + 1] * img_size / height; + cvRectangle(img, pt1, pt2, CV_RGB(255, 255, 255), 1, 8, 0); + } + + for (i = 0; i < number_of_boxes; ++i) { + CvPoint pt; + pt.x = points->data.fl[i * 2] * img_size / width; + pt.y = points->data.fl[i * 2 + 1] * img_size / height; + int cluster_idx = labels->data.i[i]; + int red_id = (cluster_idx * (uint64_t)123 + 55) % 255; + int green_id = (cluster_idx * (uint64_t)321 + 33) % 255; + int blue_id = (cluster_idx * (uint64_t)11 + 99) % 255; + cvCircle(img, pt, 1, CV_RGB(red_id, green_id, blue_id), CV_FILLED, 8, 0); + //if(pt.x > img_size || pt.y > img_size) printf("\n pt.x = %d, pt.y = %d \n", pt.x, pt.y); + } + cvShowImage("clusters", img); + cvWaitKey(0); + cvReleaseImage(&img); + cvDestroyAllWindows(); + } + + free(rel_width_height_array); + cvReleaseMat(&points); + cvReleaseMat(¢ers); + cvReleaseMat(&labels); } #else void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) { - printf(" k-means++ can't be used without OpenCV, because there is used cvKMeans2 implementation \n"); + printf(" k-means++ can't be used without OpenCV, because there is used cvKMeans2 implementation \n"); } #endif // OPENCV void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, - float hier_thresh, int dont_show, int ext_output, int save_labels) + float hier_thresh, int dont_show, int ext_output, int save_labels) { list *options = read_data_cfg(datacfg); char *name_list = option_find_str(options, "names", "data/names.list"); - int names_size = 0; - char **names = get_labels_custom(name_list, &names_size); //get_labels(name_list); + int names_size = 0; + char **names = get_labels_custom(name_list, &names_size); //get_labels(name_list); image **alphabet = load_alphabet(); network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1 @@ -1093,23 +1093,23 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam load_weights(&net, weightfile); } //set_batch_network(&net, 1); - fuse_conv_batchnorm(net); - if (net.layers[net.n - 1].classes != names_size) { - printf(" Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", - name_list, names_size, net.layers[net.n - 1].classes, cfgfile); - if(net.layers[net.n - 1].classes > names_size) getchar(); - } + fuse_conv_batchnorm(net); + if (net.layers[net.n - 1].classes != names_size) { + printf(" Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", + name_list, names_size, net.layers[net.n - 1].classes, cfgfile); + if(net.layers[net.n - 1].classes > names_size) getchar(); + } srand(2222222); double time; char buff[256]; char *input = buff; int j; - float nms=.45; // 0.4F + float nms=.45; // 0.4F while(1){ if(filename){ strncpy(input, filename, 256); - if(strlen(input) > 0) - if (input[strlen(input) - 1] == 0x0d) input[strlen(input) - 1] = 0; + if(strlen(input) > 0) + if (input[strlen(input) - 1] == 0x0d) input[strlen(input) - 1] = 0; } else { printf("Enter Image Path: "); fflush(stdout); @@ -1118,9 +1118,9 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam strtok(input, "\n"); } image im = load_image(input,0,0,net.c); - int letterbox = 0; + int letterbox = 0; image sized = resize_image(im, net.w, net.h); - //image sized = letterbox_image(im, net.w, net.h); letterbox = 1; + //image sized = letterbox_image(im, net.w, net.h); letterbox = 1; layer l = net.layers[net.n-1]; //box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); @@ -1130,97 +1130,97 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam float *X = sized.data; time= what_time_is_it_now(); network_predict(net, X); - //network_predict_image(&net, im); letterbox = 1; + //network_predict_image(&net, im); letterbox = 1; printf("%s: Predicted in %f seconds.\n", input, (what_time_is_it_now()-time)); //get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0); - // if (nms) do_nms_sort_v2(boxes, probs, l.w*l.h*l.n, l.classes, nms); - //draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes); - int nboxes = 0; - detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letterbox); - if (nms) do_nms_sort(dets, nboxes, l.classes, nms); - draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes, ext_output); + // if (nms) do_nms_sort_v2(boxes, probs, l.w*l.h*l.n, l.classes, nms); + //draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes); + int nboxes = 0; + detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letterbox); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes, ext_output); save_image(im, "predictions"); - if (!dont_show) { - show_image(im, "predictions"); - } + if (!dont_show) { + show_image(im, "predictions"); + } - // pseudo labeling concept - fast.ai - if(save_labels) - { - char labelpath[4096]; - replace_image_to_label(input, labelpath); + // pseudo labeling concept - fast.ai + if(save_labels) + { + char labelpath[4096]; + replace_image_to_label(input, labelpath); - FILE* fw = fopen(labelpath, "wb"); - int i; - for (i = 0; i < nboxes; ++i) { - char buff[1024]; - int class_id = -1; - float prob = 0; - for (j = 0; j < l.classes; ++j) { - if (dets[i].prob[j] > thresh && dets[i].prob[j] > prob) { - prob = dets[i].prob[j]; - class_id = j; - } - } - if (class_id >= 0) { - sprintf(buff, "%d %2.4f %2.4f %2.4f %2.4f\n", class_id, dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h); - fwrite(buff, sizeof(char), strlen(buff), fw); - } - } - fclose(fw); - } + FILE* fw = fopen(labelpath, "wb"); + int i; + for (i = 0; i < nboxes; ++i) { + char buff[1024]; + int class_id = -1; + float prob = 0; + for (j = 0; j < l.classes; ++j) { + if (dets[i].prob[j] > thresh && dets[i].prob[j] > prob) { + prob = dets[i].prob[j]; + class_id = j; + } + } + if (class_id >= 0) { + sprintf(buff, "%d %2.4f %2.4f %2.4f %2.4f\n", class_id, dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h); + fwrite(buff, sizeof(char), strlen(buff), fw); + } + } + fclose(fw); + } - free_detections(dets, nboxes); + free_detections(dets, nboxes); free_image(im); free_image(sized); //free(boxes); //free_ptrs((void **)probs, l.w*l.h*l.n); #ifdef OPENCV - if (!dont_show) { - cvWaitKey(0); - cvDestroyAllWindows(); - } + if (!dont_show) { + cvWaitKey(0); + cvDestroyAllWindows(); + } #endif if (filename) break; } - // free memory - free_ptrs(names, net.layers[net.n - 1].classes); - free_list_contents_kvp(options); - free_list(options); + // free memory + free_ptrs(names, net.layers[net.n - 1].classes); + free_list_contents_kvp(options); + free_list(options); - int i; - const int nsize = 8; - for (j = 0; j < nsize; ++j) { - for (i = 32; i < 127; ++i) { - free_image(alphabet[j][i]); - } - free(alphabet[j]); - } - free(alphabet); + int i; + const int nsize = 8; + for (j = 0; j < nsize; ++j) { + for (i = 32; i < 127; ++i) { + free_image(alphabet[j][i]); + } + free(alphabet[j]); + } + free(alphabet); - free_network(net); + free_network(net); } void run_detector(int argc, char **argv) { - int dont_show = find_arg(argc, argv, "-dont_show"); - int show = find_arg(argc, argv, "-show"); - int http_stream_port = find_int_arg(argc, argv, "-http_port", -1); - char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); - char *outfile = find_char_arg(argc, argv, "-out", 0); + int dont_show = find_arg(argc, argv, "-dont_show"); + int show = find_arg(argc, argv, "-show"); + int http_stream_port = find_int_arg(argc, argv, "-http_port", -1); + char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); + char *outfile = find_char_arg(argc, argv, "-out", 0); char *prefix = find_char_arg(argc, argv, "-prefix", 0); - float thresh = find_float_arg(argc, argv, "-thresh", .25); // 0.24 - float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + float thresh = find_float_arg(argc, argv, "-thresh", .25); // 0.24 + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); int cam_index = find_int_arg(argc, argv, "-c", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0); - int num_of_clusters = find_int_arg(argc, argv, "-num_of_clusters", 5); - int width = find_int_arg(argc, argv, "-width", -1); - int height = find_int_arg(argc, argv, "-height", -1); - // extended output in test mode (output of rect bound coords) - // and for recall mode (extended output table-like format with results for best_class fit) - int ext_output = find_arg(argc, argv, "-ext_output"); - int save_labels = find_arg(argc, argv, "-save_labels"); + int num_of_clusters = find_int_arg(argc, argv, "-num_of_clusters", 5); + int width = find_int_arg(argc, argv, "-width", -1); + int height = find_int_arg(argc, argv, "-height", -1); + // extended output in test mode (output of rect bound coords) + // and for recall mode (extended output table-like format with results for best_class fit) + int ext_output = find_arg(argc, argv, "-ext_output"); + int save_labels = find_arg(argc, argv, "-save_labels"); if(argc < 4){ fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; @@ -1253,29 +1253,29 @@ void run_detector(int argc, char **argv) char *datacfg = argv[3]; char *cfg = argv[4]; char *weights = (argc > 5) ? argv[5] : 0; - if(weights) - if(strlen(weights) > 0) - if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0; + if(weights) + if(strlen(weights) > 0) + if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0; char *filename = (argc > 6) ? argv[6]: 0; if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels); else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show); else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); else if(0==strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights); - else if(0==strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh); - else if(0==strcmp(argv[2], "calc_anchors")) calc_anchors(datacfg, num_of_clusters, width, height, show); + else if(0==strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh); + else if(0==strcmp(argv[2], "calc_anchors")) calc_anchors(datacfg, num_of_clusters, width, height, show); else if(0==strcmp(argv[2], "demo")) { list *options = read_data_cfg(datacfg); int classes = option_find_int(options, "classes", 20); char *name_list = option_find_str(options, "names", "data/names.list"); char **names = get_labels(name_list); - if(filename) - if(strlen(filename) > 0) - if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0; + if(filename) + if(strlen(filename) > 0) + if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0; demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename, - http_stream_port, dont_show, ext_output); + http_stream_port, dont_show, ext_output); - free_list_contents_kvp(options); - free_list(options); + free_list_contents_kvp(options); + free_list(options); } - else printf(" There isn't such command: %s", argv[2]); + else printf(" There isn't such command: %s", argv[2]); } diff --git a/src/gemm.c b/src/gemm.c index 506687be..2b90b059 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -87,7 +87,7 @@ void gemm(int TA, int TB, int M, int N, int K, float ALPHA, #include #include -#else // Linux GCC/Clang +#else // Linux GCC/Clang #include #include #include @@ -96,124 +96,124 @@ void gemm(int TA, int TB, int M, int N, int K, float ALPHA, void asm_cpuid(uint32_t* abcd, uint32_t eax) { - uint32_t ebx = 0, edx = 0, ecx = 0; + uint32_t ebx = 0, edx = 0, ecx = 0; - // EBX is saved to EDI and later restored - __asm__("movl %%ebx, %%edi;" - "cpuid;" - "xchgl %%ebx, %%edi;" - : "=D"(ebx), - "+a"(eax), "+c"(ecx), "=d"(edx)); + // EBX is saved to EDI and later restored + __asm__("movl %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=D"(ebx), + "+a"(eax), "+c"(ecx), "=d"(edx)); - abcd[0] = eax; - abcd[1] = ebx; - abcd[2] = ecx; - abcd[3] = edx; + abcd[0] = eax; + abcd[1] = ebx; + abcd[2] = ecx; + abcd[3] = edx; } #endif int simd_detect_x86(unsigned int idFeature) { - uint32_t regs[4]; // EAX, EBX, ECX, EDX; + uint32_t regs[4]; // EAX, EBX, ECX, EDX; #ifdef _WIN32 - __cpuid(regs, 0); - if (regs[0] > 1U) __cpuid(regs, 1); + __cpuid(regs, 0); + if (regs[0] > 1U) __cpuid(regs, 1); #else - __get_cpuid(0, ®s[0], ®s[1], ®s[2], ®s[3]); - if(regs[0] > 1U) __get_cpuid(1, ®s[0], ®s[1], ®s[2], ®s[3]); + __get_cpuid(0, ®s[0], ®s[1], ®s[2], ®s[3]); + if(regs[0] > 1U) __get_cpuid(1, ®s[0], ®s[1], ®s[2], ®s[3]); #endif - if ((regs[2] & idFeature) != idFeature) - return 0; - return 1; + if ((regs[2] & idFeature) != idFeature) + return 0; + return 1; } int is_fma_avx() { - static int result = -1; - if (result == -1) { - result = simd_detect_x86(AVXFlag); - if (result == 1) printf(" Used AVX \n"); - else printf(" Not used AVX \n"); - } - return result; + static int result = -1; + if (result == -1) { + result = simd_detect_x86(AVXFlag); + if (result == 1) printf(" Used AVX \n"); + else printf(" Not used AVX \n"); + } + return result; } // https://software.intel.com/sites/landingpage/IntrinsicsGuide void gemm_nn(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) + float *A, int lda, + float *B, int ldb, + float *C, int ldc) { - int i, j, k; - if (is_fma_avx() == 1) { // AVX - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - float A_PART = ALPHA*A[i*lda + k]; - __m256 a256, b256, c256, result256; // AVX - a256 = _mm256_set1_ps(A_PART); - for (j = 0; j < N - 8; j += 8) { - b256 = _mm256_loadu_ps(&B[k*ldb + j]); - c256 = _mm256_loadu_ps(&C[i*ldc + j]); - // FMA - Intel Haswell (2013), AMD Piledriver (2012) - //result256 = _mm256_fmadd_ps(a256, b256, c256); - result256 = _mm256_mul_ps(a256, b256); - result256 = _mm256_add_ps(result256, c256); - _mm256_storeu_ps(&C[i*ldc + j], result256); - } + int i, j, k; + if (is_fma_avx() == 1) { // AVX + for (i = 0; i < M; ++i) { + for (k = 0; k < K; ++k) { + float A_PART = ALPHA*A[i*lda + k]; + __m256 a256, b256, c256, result256; // AVX + a256 = _mm256_set1_ps(A_PART); + for (j = 0; j < N - 8; j += 8) { + b256 = _mm256_loadu_ps(&B[k*ldb + j]); + c256 = _mm256_loadu_ps(&C[i*ldc + j]); + // FMA - Intel Haswell (2013), AMD Piledriver (2012) + //result256 = _mm256_fmadd_ps(a256, b256, c256); + result256 = _mm256_mul_ps(a256, b256); + result256 = _mm256_add_ps(result256, c256); + _mm256_storeu_ps(&C[i*ldc + j], result256); + } - int prev_end = (N % 8 == 0) ? (N - 8) : (N / 8) * 8; - for (j = prev_end; j < N; ++j) - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - } - } - else { - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - register float A_PART = ALPHA*A[i*lda + k]; - for (j = 0; j < N; ++j) { - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - /* // SSE - __m128 a128, b128, c128, result128; // SSE - a128 = _mm_set1_ps(A_PART); - for (j = 0; j < N - 4; j += 4) { - b128 = _mm_loadu_ps(&B[k*ldb + j]); - c128 = _mm_loadu_ps(&C[i*ldc + j]); - //result128 = _mm_fmadd_ps(a128, b128, c128); - result128 = _mm_mul_ps(a128, b128); - result128 = _mm_add_ps(result128, c128); - _mm_storeu_ps(&C[i*ldc + j], result128); - } + int prev_end = (N % 8 == 0) ? (N - 8) : (N / 8) * 8; + for (j = prev_end; j < N; ++j) + C[i*ldc + j] += A_PART*B[k*ldb + j]; + } + } + } + else { + for (i = 0; i < M; ++i) { + for (k = 0; k < K; ++k) { + register float A_PART = ALPHA*A[i*lda + k]; + for (j = 0; j < N; ++j) { + C[i*ldc + j] += A_PART*B[k*ldb + j]; + } + /* // SSE + __m128 a128, b128, c128, result128; // SSE + a128 = _mm_set1_ps(A_PART); + for (j = 0; j < N - 4; j += 4) { + b128 = _mm_loadu_ps(&B[k*ldb + j]); + c128 = _mm_loadu_ps(&C[i*ldc + j]); + //result128 = _mm_fmadd_ps(a128, b128, c128); + result128 = _mm_mul_ps(a128, b128); + result128 = _mm_add_ps(result128, c128); + _mm_storeu_ps(&C[i*ldc + j], result128); + } - int prev_end = (N % 4 == 0) ? (N - 4) : (N / 4) * 4; - for (j = prev_end; j < N; ++j){ - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - */ - } - } - } + int prev_end = (N % 4 == 0) ? (N - 4) : (N / 4) * 4; + for (j = prev_end; j < N; ++j){ + C[i*ldc + j] += A_PART*B[k*ldb + j]; + } + */ + } + } + } } #else void gemm_nn(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) + float *A, int lda, + float *B, int ldb, + float *C, int ldc) { - int i, j, k; - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - register float A_PART = ALPHA*A[i*lda + k]; - for (j = 0; j < N; ++j) { - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - } - } + int i, j, k; + for (i = 0; i < M; ++i) { + for (k = 0; k < K; ++k) { + register float A_PART = ALPHA*A[i*lda + k]; + for (j = 0; j < N; ++j) { + C[i*ldc + j] += A_PART*B[k*ldb + j]; + } + } + } } -#endif // __x86_64 +#endif // __x86_64 void gemm_nt(int M, int N, int K, float ALPHA, float *A, int lda, @@ -282,18 +282,18 @@ void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, } } - int t; - #pragma omp parallel for - for (t = 0; t < M; ++t) { - if (!TA && !TB) - gemm_nn(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc); - else if (TA && !TB) - gemm_tn(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc); - else if (!TA && TB) - gemm_nt(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc); - else - gemm_tt(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc); - } + int t; + #pragma omp parallel for + for (t = 0; t < M; ++t) { + if (!TA && !TB) + gemm_nn(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc); + else if (TA && !TB) + gemm_tn(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc); + else if (!TA && TB) + gemm_nt(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc); + else + gemm_tt(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc); + } } #ifdef GPU @@ -307,7 +307,7 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA, float *C_gpu, int ldc) { cublasHandle_t handle = blas_handle(); - cudaError_t stream_status = cublasSetStream(handle, get_cuda_stream()); + cudaError_t stream_status = cublasSetStream(handle, get_cuda_stream()); cudaError_t status = cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); check_error(status); diff --git a/src/gettimeofday.c b/src/gettimeofday.c index c8612bf0..7df6480d 100644 --- a/src/gettimeofday.c +++ b/src/gettimeofday.c @@ -1,5 +1,5 @@ #include "gettimeofday.h" - + int gettimeofday(struct timeval *tv, struct timezone *tz) { FILETIME ft; diff --git a/src/http_stream.cpp b/src/http_stream.cpp index b122f5b7..4179ef5a 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -7,7 +7,7 @@ // // socket related abstractions: // -#ifdef _WIN32 +#ifdef _WIN32 #pragma comment(lib, "ws2_32.lib") #include #include @@ -16,8 +16,8 @@ #define ADDRPOINTER int* struct _INIT_W32DATA { - WSADATA w; - _INIT_W32DATA() { WSAStartup(MAKEWORD(2, 1), &w); } + WSADATA w; + _INIT_W32DATA() { WSAStartup(MAKEWORD(2, 1), &w); } } _init_once; #else /* ! win32 */ #include @@ -58,274 +58,274 @@ using namespace cv; class MJPGWriter { - SOCKET sock; - SOCKET maxfd; - fd_set master; - int timeout; // master sock timeout, shutdown after timeout millis. - int quality; // jpeg compression [1..100] + SOCKET sock; + SOCKET maxfd; + fd_set master; + int timeout; // master sock timeout, shutdown after timeout millis. + int quality; // jpeg compression [1..100] - int _write(int sock, char const*const s, int len) - { - if (len < 1) { len = strlen(s); } - return ::send(sock, s, len, 0); - } + int _write(int sock, char const*const s, int len) + { + if (len < 1) { len = strlen(s); } + return ::send(sock, s, len, 0); + } public: - MJPGWriter(int port = 0, int _timeout = 200000, int _quality = 30) - : sock(INVALID_SOCKET) - , timeout(_timeout) - , quality(_quality) - { - FD_ZERO(&master); - if (port) - open(port); - } + MJPGWriter(int port = 0, int _timeout = 200000, int _quality = 30) + : sock(INVALID_SOCKET) + , timeout(_timeout) + , quality(_quality) + { + FD_ZERO(&master); + if (port) + open(port); + } - ~MJPGWriter() - { - release(); - } + ~MJPGWriter() + { + release(); + } - bool release() - { - if (sock != INVALID_SOCKET) - ::shutdown(sock, 2); - sock = (INVALID_SOCKET); - return false; - } + bool release() + { + if (sock != INVALID_SOCKET) + ::shutdown(sock, 2); + sock = (INVALID_SOCKET); + return false; + } - bool open(int port) - { - sock = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + bool open(int port) + { + sock = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - SOCKADDR_IN address; - address.sin_addr.s_addr = INADDR_ANY; - address.sin_family = AF_INET; - address.sin_port = htons(port); // ::htons(port); - if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR) - { - cerr << "error : couldn't bind sock " << sock << " to port " << port << "!" << endl; - return release(); - } - if (::listen(sock, 10) == SOCKET_ERROR) - { - cerr << "error : couldn't listen on sock " << sock << " on port " << port << " !" << endl; - return release(); - } - FD_ZERO(&master); - FD_SET(sock, &master); - maxfd = sock; - return true; - } + SOCKADDR_IN address; + address.sin_addr.s_addr = INADDR_ANY; + address.sin_family = AF_INET; + address.sin_port = htons(port); // ::htons(port); + if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR) + { + cerr << "error : couldn't bind sock " << sock << " to port " << port << "!" << endl; + return release(); + } + if (::listen(sock, 10) == SOCKET_ERROR) + { + cerr << "error : couldn't listen on sock " << sock << " on port " << port << " !" << endl; + return release(); + } + FD_ZERO(&master); + FD_SET(sock, &master); + maxfd = sock; + return true; + } - bool isOpened() - { - return sock != INVALID_SOCKET; - } + bool isOpened() + { + return sock != INVALID_SOCKET; + } - bool write(const Mat & frame) - { - fd_set rread = master; - struct timeval to = { 0,timeout }; - if (::select(maxfd+1, &rread, NULL, NULL, &to) <= 0) - return true; // nothing broken, there's just noone listening + bool write(const Mat & frame) + { + fd_set rread = master; + struct timeval to = { 0,timeout }; + if (::select(maxfd+1, &rread, NULL, NULL, &to) <= 0) + return true; // nothing broken, there's just noone listening - std::vector outbuf; - std::vector params; - params.push_back(IMWRITE_JPEG_QUALITY); - params.push_back(quality); - cv::imencode(".jpg", frame, outbuf, params); - size_t outlen = outbuf.size(); + std::vector outbuf; + std::vector params; + params.push_back(IMWRITE_JPEG_QUALITY); + params.push_back(quality); + cv::imencode(".jpg", frame, outbuf, params); + size_t outlen = outbuf.size(); -#ifdef _WIN32 - for (unsigned i = 0; iclient ? maxfd : client); - FD_SET(client, &master); - _write(client, "HTTP/1.0 200 OK\r\n", 0); - _write(client, - "Server: Mozarella/2.2\r\n" - "Accept-Range: bytes\r\n" - "Connection: close\r\n" - "Max-Age: 0\r\n" - "Expires: 0\r\n" - "Cache-Control: no-cache, private\r\n" - "Pragma: no-cache\r\n" - "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n" - "\r\n", 0); - cerr << "new client " << client << endl; - } - else // existing client, just stream pix - { - char head[400]; - sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen); - _write(s, head, 0); - int n = _write(s, (char*)(&outbuf[0]), outlen); - //cerr << "known client " << s << " " << n << endl; - if (n < outlen) - { - cerr << "kill client " << s << endl; - ::shutdown(s, 2); - FD_CLR(s, &master); - } - } - } - return true; - } +#ifdef _WIN32 + for (unsigned i = 0; iclient ? maxfd : client); + FD_SET(client, &master); + _write(client, "HTTP/1.0 200 OK\r\n", 0); + _write(client, + "Server: Mozarella/2.2\r\n" + "Accept-Range: bytes\r\n" + "Connection: close\r\n" + "Max-Age: 0\r\n" + "Expires: 0\r\n" + "Cache-Control: no-cache, private\r\n" + "Pragma: no-cache\r\n" + "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n" + "\r\n", 0); + cerr << "new client " << client << endl; + } + else // existing client, just stream pix + { + char head[400]; + sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen); + _write(s, head, 0); + int n = _write(s, (char*)(&outbuf[0]), outlen); + //cerr << "known client " << s << " " << n << endl; + if (n < outlen) + { + cerr << "kill client " << s << endl; + ::shutdown(s, 2); + FD_CLR(s, &master); + } + } + } + return true; + } }; // ---------------------------------------- void send_mjpeg(IplImage* ipl, int port, int timeout, int quality) { - static MJPGWriter wri(port, timeout, quality); - cv::Mat mat = cv::cvarrToMat(ipl); - wri.write(mat); - std::cout << " MJPEG-stream sent. \n"; + static MJPGWriter wri(port, timeout, quality); + cv::Mat mat = cv::cvarrToMat(ipl); + wri.write(mat); + std::cout << " MJPEG-stream sent. \n"; } // ---------------------------------------- CvCapture* get_capture_video_stream(char *path) { - CvCapture* cap = NULL; - try { - cap = (CvCapture*)new cv::VideoCapture(path); - } - catch (...) { - std::cout << " Error: video-stream " << path << " can't be opened! \n"; - } - return cap; + CvCapture* cap = NULL; + try { + cap = (CvCapture*)new cv::VideoCapture(path); + } + catch (...) { + std::cout << " Error: video-stream " << path << " can't be opened! \n"; + } + return cap; } // ---------------------------------------- CvCapture* get_capture_webcam(int index) { - CvCapture* cap = NULL; - try { - cap = (CvCapture*)new cv::VideoCapture(index); - //((cv::VideoCapture*)cap)->set(CV_CAP_PROP_FRAME_WIDTH, 1280); - //((cv::VideoCapture*)cap)->set(CV_CAP_PROP_FRAME_HEIGHT, 960); - } - catch (...) { - std::cout << " Error: Web-camera " << index << " can't be opened! \n"; - } - return cap; + CvCapture* cap = NULL; + try { + cap = (CvCapture*)new cv::VideoCapture(index); + //((cv::VideoCapture*)cap)->set(CV_CAP_PROP_FRAME_WIDTH, 1280); + //((cv::VideoCapture*)cap)->set(CV_CAP_PROP_FRAME_HEIGHT, 960); + } + catch (...) { + std::cout << " Error: Web-camera " << index << " can't be opened! \n"; + } + return cap; } // ---------------------------------------- IplImage* get_webcam_frame(CvCapture *cap) { - IplImage* src = NULL; - try { - cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; - cv::Mat frame; - if (cpp_cap.isOpened()) - { - cpp_cap >> frame; - IplImage tmp = frame; - src = cvCloneImage(&tmp); - } - else { - std::cout << " Video-stream stoped! \n"; - } - } - catch (...) { - std::cout << " Video-stream stoped! \n"; - } - return src; + IplImage* src = NULL; + try { + cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; + cv::Mat frame; + if (cpp_cap.isOpened()) + { + cpp_cap >> frame; + IplImage tmp = frame; + src = cvCloneImage(&tmp); + } + else { + std::cout << " Video-stream stoped! \n"; + } + } + catch (...) { + std::cout << " Video-stream stoped! \n"; + } + return src; } int get_stream_fps_cpp(CvCapture *cap) { - int fps = 25; - try { - cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; -#ifndef CV_VERSION_EPOCH // OpenCV 3.x - fps = cpp_cap.get(CAP_PROP_FPS); -#else // OpenCV 2.x - fps = cpp_cap.get(CV_CAP_PROP_FPS); -#endif - } - catch (...) { - std::cout << " Can't get FPS of source videofile. For output video FPS = 25 by default. \n"; - } - return fps; + int fps = 25; + try { + cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; +#ifndef CV_VERSION_EPOCH // OpenCV 3.x + fps = cpp_cap.get(CAP_PROP_FPS); +#else // OpenCV 2.x + fps = cpp_cap.get(CV_CAP_PROP_FPS); +#endif + } + catch (...) { + std::cout << " Can't get FPS of source videofile. For output video FPS = 25 by default. \n"; + } + return fps; } // ---------------------------------------- extern "C" { - image ipl_to_image(IplImage* src); // image.c + image ipl_to_image(IplImage* src); // image.c } image image_data_augmentation(IplImage* ipl, int w, int h, - int pleft, int ptop, int swidth, int sheight, int flip, - float jitter, float dhue, float dsat, float dexp) + int pleft, int ptop, int swidth, int sheight, int flip, + float jitter, float dhue, float dsat, float dexp) { - cv::Mat img = cv::cvarrToMat(ipl); + cv::Mat img = cv::cvarrToMat(ipl); - // crop - cv::Rect src_rect(pleft, ptop, swidth, sheight); - cv::Rect img_rect(cv::Point2i(0, 0), img.size()); - cv::Rect new_src_rect = src_rect & img_rect; + // crop + cv::Rect src_rect(pleft, ptop, swidth, sheight); + cv::Rect img_rect(cv::Point2i(0, 0), img.size()); + cv::Rect new_src_rect = src_rect & img_rect; - cv::Rect dst_rect(cv::Point2i(std::max(0, -pleft), std::max(0, -ptop)), new_src_rect.size()); + cv::Rect dst_rect(cv::Point2i(std::max(0, -pleft), std::max(0, -ptop)), new_src_rect.size()); - cv::Mat cropped(cv::Size(src_rect.width, src_rect.height), img.type()); - cropped.setTo(cv::Scalar::all(0)); + cv::Mat cropped(cv::Size(src_rect.width, src_rect.height), img.type()); + cropped.setTo(cv::Scalar::all(0)); - img(new_src_rect).copyTo(cropped(dst_rect)); + img(new_src_rect).copyTo(cropped(dst_rect)); - // resize - cv::Mat sized; - cv::resize(cropped, sized, cv::Size(w, h), 0, 0, INTER_LINEAR); + // resize + cv::Mat sized; + cv::resize(cropped, sized, cv::Size(w, h), 0, 0, INTER_LINEAR); - // flip - if (flip) { - cv::flip(sized, cropped, 1); // 0 - x-axis, 1 - y-axis, -1 - both axes (x & y) - sized = cropped.clone(); - } + // flip + if (flip) { + cv::flip(sized, cropped, 1); // 0 - x-axis, 1 - y-axis, -1 - both axes (x & y) + sized = cropped.clone(); + } - // HSV augmentation - // CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB - if (ipl->nChannels >= 3) - { - cv::Mat hsv_src; - cvtColor(sized, hsv_src, CV_BGR2HSV); // also BGR -> RGB - - std::vector hsv; - cv::split(hsv_src, hsv); + // HSV augmentation + // CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB + if (ipl->nChannels >= 3) + { + cv::Mat hsv_src; + cvtColor(sized, hsv_src, CV_BGR2HSV); // also BGR -> RGB - hsv[1] *= dsat; - hsv[2] *= dexp; - hsv[0] += 179 * dhue; + std::vector hsv; + cv::split(hsv_src, hsv); - cv::merge(hsv, hsv_src); + hsv[1] *= dsat; + hsv[2] *= dexp; + hsv[0] += 179 * dhue; - cvtColor(hsv_src, sized, CV_HSV2RGB); // now RGB instead of BGR - } - else - { - sized *= dexp; - } + cv::merge(hsv, hsv_src); - // Mat -> IplImage -> image - IplImage src = sized; - image out = ipl_to_image(&src); + cvtColor(hsv_src, sized, CV_HSV2RGB); // now RGB instead of BGR + } + else + { + sized *= dexp; + } - return out; + // Mat -> IplImage -> image + IplImage src = sized; + image out = ipl_to_image(&src); + + return out; } -#endif // OPENCV +#endif // OPENCV diff --git a/src/http_stream.h b/src/http_stream.h index bff18fb7..298d3398 100644 --- a/src/http_stream.h +++ b/src/http_stream.h @@ -14,8 +14,8 @@ IplImage* get_webcam_frame(CvCapture *cap); int get_stream_fps_cpp(CvCapture *cap); image image_data_augmentation(IplImage* ipl, int w, int h, - int pleft, int ptop, int swidth, int sheight, int flip, - float jitter, float dhue, float dsat, float dexp); + int pleft, int ptop, int swidth, int sheight, int flip, + float jitter, float dhue, float dsat, float dexp); #ifdef __cplusplus } diff --git a/src/image.c b/src/image.c index a2dc3cac..75259e8f 100644 --- a/src/image.c +++ b/src/image.c @@ -40,31 +40,31 @@ float get_color(int c, int x, int max) static float get_pixel(image m, int x, int y, int c) { - assert(x < m.w && y < m.h && c < m.c); - return m.data[c*m.h*m.w + y*m.w + x]; + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; } static float get_pixel_extend(image m, int x, int y, int c) { - if (x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; - /* - if(x < 0) x = 0; - if(x >= m.w) x = m.w-1; - if(y < 0) y = 0; - if(y >= m.h) y = m.h-1; - */ - if (c < 0 || c >= m.c) return 0; - return get_pixel(m, x, y, c); + if (x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if (c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); } static void set_pixel(image m, int x, int y, int c, float val) { - if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; - assert(x < m.w && y < m.h && c < m.c); - m.data[c*m.h*m.w + y*m.w + x] = val; + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; } static void add_pixel(image m, int x, int y, int c, float val) { - assert(x < m.w && y < m.h && c < m.c); - m.data[c*m.h*m.w + y*m.w + x] += val; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; } void composite_image(image source, image dest, int dx, int dy) @@ -125,19 +125,19 @@ image get_label(image **characters, char *string, int size) image get_label_v3(image **characters, char *string, int size) { - size = size / 10; - if (size > 7) size = 7; - image label = make_empty_image(0, 0, 0); - while (*string) { - image l = characters[size][(int)*string]; - image n = tile_images(label, l, -size - 1 + (size + 1) / 2); - free_image(label); - label = n; - ++string; - } - image b = border_image(label, label.h*.25); - free_image(label); - return b; + size = size / 10; + if (size > 7) size = 7; + image label = make_empty_image(0, 0, 0); + while (*string) { + image l = characters[size][(int)*string]; + image n = tile_images(label, l, -size - 1 + (size + 1) / 2); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; } void draw_label(image a, int r, int c, image label, const float *rgb) @@ -235,143 +235,143 @@ image **load_alphabet() // Creates array of detections with prob > thresh and fills best_class for them detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num) { - int selected_num = 0; - detection_with_class* result_arr = calloc(dets_num, sizeof(detection_with_class)); - int i; - for (i = 0; i < dets_num; ++i) { - int best_class = -1; - float best_class_prob = thresh; - int j; - for (j = 0; j < dets[i].classes; ++j) { - if (dets[i].prob[j] > best_class_prob ) { - best_class = j; - best_class_prob = dets[i].prob[j]; - } - } - if (best_class >= 0) { - result_arr[selected_num].det = dets[i]; - result_arr[selected_num].best_class = best_class; - ++selected_num; - } - } - if (selected_detections_num) - *selected_detections_num = selected_num; - return result_arr; + int selected_num = 0; + detection_with_class* result_arr = calloc(dets_num, sizeof(detection_with_class)); + int i; + for (i = 0; i < dets_num; ++i) { + int best_class = -1; + float best_class_prob = thresh; + int j; + for (j = 0; j < dets[i].classes; ++j) { + if (dets[i].prob[j] > best_class_prob ) { + best_class = j; + best_class_prob = dets[i].prob[j]; + } + } + if (best_class >= 0) { + result_arr[selected_num].det = dets[i]; + result_arr[selected_num].best_class = best_class; + ++selected_num; + } + } + if (selected_detections_num) + *selected_detections_num = selected_num; + return result_arr; } // compare to sort detection** by bbox.x int compare_by_lefts(const void *a_ptr, const void *b_ptr) { - const detection_with_class* a = (detection_with_class*)a_ptr; - const detection_with_class* b = (detection_with_class*)b_ptr; - const float delta = (a->det.bbox.x - a->det.bbox.w/2) - (b->det.bbox.x - b->det.bbox.w/2); - return delta < 0 ? -1 : delta > 0 ? 1 : 0; + const detection_with_class* a = (detection_with_class*)a_ptr; + const detection_with_class* b = (detection_with_class*)b_ptr; + const float delta = (a->det.bbox.x - a->det.bbox.w/2) - (b->det.bbox.x - b->det.bbox.w/2); + return delta < 0 ? -1 : delta > 0 ? 1 : 0; } // compare to sort detection** by best_class probability int compare_by_probs(const void *a_ptr, const void *b_ptr) { - const detection_with_class* a = (detection_with_class*)a_ptr; - const detection_with_class* b = (detection_with_class*)b_ptr; - float delta = a->det.prob[a->best_class] - b->det.prob[b->best_class]; - return delta < 0 ? -1 : delta > 0 ? 1 : 0; + const detection_with_class* a = (detection_with_class*)a_ptr; + const detection_with_class* b = (detection_with_class*)b_ptr; + float delta = a->det.prob[a->best_class] - b->det.prob[b->best_class]; + return delta < 0 ? -1 : delta > 0 ? 1 : 0; } void draw_detections_v3(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output) { - int selected_detections_num; - detection_with_class* selected_detections = get_actual_detections(dets, num, thresh, &selected_detections_num); + int selected_detections_num; + detection_with_class* selected_detections = get_actual_detections(dets, num, thresh, &selected_detections_num); - // text output - qsort(selected_detections, selected_detections_num, sizeof(*selected_detections), compare_by_lefts); - int i; - for (i = 0; i < selected_detections_num; ++i) { - const int best_class = selected_detections[i].best_class; - printf("%s: %.0f%%", names[best_class], selected_detections[i].det.prob[best_class] * 100); - if (ext_output) - printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n", - (selected_detections[i].det.bbox.x - selected_detections[i].det.bbox.w / 2)*im.w, - (selected_detections[i].det.bbox.y - selected_detections[i].det.bbox.h / 2)*im.h, - selected_detections[i].det.bbox.w*im.w, selected_detections[i].det.bbox.h*im.h); - else - printf("\n"); - int j; - for (j = 0; j < classes; ++j) { - if (selected_detections[i].det.prob[j] > thresh && j != best_class) { - printf("%s: %.0f%%\n", names[j], selected_detections[i].det.prob[j] * 100); - } - } - } + // text output + qsort(selected_detections, selected_detections_num, sizeof(*selected_detections), compare_by_lefts); + int i; + for (i = 0; i < selected_detections_num; ++i) { + const int best_class = selected_detections[i].best_class; + printf("%s: %.0f%%", names[best_class], selected_detections[i].det.prob[best_class] * 100); + if (ext_output) + printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n", + (selected_detections[i].det.bbox.x - selected_detections[i].det.bbox.w / 2)*im.w, + (selected_detections[i].det.bbox.y - selected_detections[i].det.bbox.h / 2)*im.h, + selected_detections[i].det.bbox.w*im.w, selected_detections[i].det.bbox.h*im.h); + else + printf("\n"); + int j; + for (j = 0; j < classes; ++j) { + if (selected_detections[i].det.prob[j] > thresh && j != best_class) { + printf("%s: %.0f%%\n", names[j], selected_detections[i].det.prob[j] * 100); + } + } + } - // image output - qsort(selected_detections, selected_detections_num, sizeof(*selected_detections), compare_by_probs); - for (i = 0; i < selected_detections_num; ++i) { - int width = im.h * .006; - if (width < 1) - width = 1; + // image output + qsort(selected_detections, selected_detections_num, sizeof(*selected_detections), compare_by_probs); + for (i = 0; i < selected_detections_num; ++i) { + int width = im.h * .006; + if (width < 1) + width = 1; - /* - if(0){ - width = pow(prob, 1./2.)*10+1; - alphabet = 0; - } - */ + /* + if(0){ + width = pow(prob, 1./2.)*10+1; + alphabet = 0; + } + */ - //printf("%d %s: %.0f%%\n", i, names[selected_detections[i].best_class], prob*100); - int offset = selected_detections[i].best_class * 123457 % classes; - float red = get_color(2, offset, classes); - float green = get_color(1, offset, classes); - float blue = get_color(0, offset, classes); - float rgb[3]; + //printf("%d %s: %.0f%%\n", i, names[selected_detections[i].best_class], prob*100); + int offset = selected_detections[i].best_class * 123457 % classes; + float red = get_color(2, offset, classes); + float green = get_color(1, offset, classes); + float blue = get_color(0, offset, classes); + float rgb[3]; - //width = prob*20+2; + //width = prob*20+2; - rgb[0] = red; - rgb[1] = green; - rgb[2] = blue; - box b = selected_detections[i].det.bbox; - //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = selected_detections[i].det.bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); - int left = (b.x - b.w / 2.)*im.w; - int right = (b.x + b.w / 2.)*im.w; - int top = (b.y - b.h / 2.)*im.h; - int bot = (b.y + b.h / 2.)*im.h; + int left = (b.x - b.w / 2.)*im.w; + int right = (b.x + b.w / 2.)*im.w; + int top = (b.y - b.h / 2.)*im.h; + int bot = (b.y + b.h / 2.)*im.h; - if (left < 0) left = 0; - if (right > im.w - 1) right = im.w - 1; - if (top < 0) top = 0; - if (bot > im.h - 1) bot = im.h - 1; + if (left < 0) left = 0; + if (right > im.w - 1) right = im.w - 1; + if (top < 0) top = 0; + if (bot > im.h - 1) bot = im.h - 1; - //int b_x_center = (left + right) / 2; - //int b_y_center = (top + bot) / 2; - //int b_width = right - left; - //int b_height = bot - top; - //sprintf(labelstr, "%d x %d - w: %d, h: %d", b_x_center, b_y_center, b_width, b_height); + //int b_x_center = (left + right) / 2; + //int b_y_center = (top + bot) / 2; + //int b_width = right - left; + //int b_height = bot - top; + //sprintf(labelstr, "%d x %d - w: %d, h: %d", b_x_center, b_y_center, b_width, b_height); - draw_box_width(im, left, top, right, bot, width, red, green, blue); - if (alphabet) { - char labelstr[4096] = { 0 }; - strcat(labelstr, names[selected_detections[i].best_class]); - int j; - for (j = 0; j < classes; ++j) { - if (selected_detections[i].det.prob[j] > thresh && j != selected_detections[i].best_class) { - strcat(labelstr, ", "); - strcat(labelstr, names[j]); - } - } - image label = get_label_v3(alphabet, labelstr, (im.h*.03)); - draw_label(im, top + width, left, label, rgb); - free_image(label); - } - if (selected_detections[i].det.mask) { - image mask = float_to_image(14, 14, 1, selected_detections[i].det.mask); - image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); - image tmask = threshold_image(resized_mask, .5); - embed_image(tmask, im, left, top); - free_image(mask); - free_image(resized_mask); - free_image(tmask); - } - } - free(selected_detections); + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (alphabet) { + char labelstr[4096] = { 0 }; + strcat(labelstr, names[selected_detections[i].best_class]); + int j; + for (j = 0; j < classes; ++j) { + if (selected_detections[i].det.prob[j] > thresh && j != selected_detections[i].best_class) { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + } + image label = get_label_v3(alphabet, labelstr, (im.h*.03)); + draw_label(im, top + width, left, label, rgb); + free_image(label); + } + if (selected_detections[i].det.mask) { + image mask = float_to_image(14, 14, 1, selected_detections[i].det.mask); + image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); + image tmask = threshold_image(resized_mask, .5); + embed_image(tmask, im, left, top); + free_image(mask); + free_image(resized_mask); + free_image(tmask); + } + } + free(selected_detections); } void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes) @@ -383,13 +383,13 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, float prob = probs[i][class_id]; if(prob > thresh){ - //// for comparison with OpenCV version of DNN Darknet Yolo v2 - //printf("\n %f, %f, %f, %f, ", boxes[i].x, boxes[i].y, boxes[i].w, boxes[i].h); - // int k; - //for (k = 0; k < classes; ++k) { - // printf("%f, ", probs[i][k]); - //} - //printf("\n"); + //// for comparison with OpenCV version of DNN Darknet Yolo v2 + //printf("\n %f, %f, %f, %f, ", boxes[i].x, boxes[i].y, boxes[i].w, boxes[i].h); + // int k; + //for (k = 0; k < classes; ++k) { + // printf("%f, ", probs[i][k]); + //} + //printf("\n"); int width = im.h * .012; @@ -420,12 +420,12 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, if(right > im.w-1) right = im.w-1; if(top < 0) top = 0; if(bot > im.h-1) bot = im.h-1; - printf("%s: %.0f%%", names[class_id], prob * 100); - - //printf(" - id: %d, x_center: %d, y_center: %d, width: %d, height: %d", - // class_id, (right + left) / 2, (bot - top) / 2, right - left, bot - top); + printf("%s: %.0f%%", names[class_id], prob * 100); + + //printf(" - id: %d, x_center: %d, y_center: %d, width: %d, height: %d", + // class_id, (right + left) / 2, (bot - top) / 2, right - left, bot - top); - printf("\n"); + printf("\n"); draw_box_width(im, left, top, right, bot, width, red, green, blue); if (alphabet) { image label = get_label(alphabet, names[class_id], (im.h*.03)/10); @@ -439,257 +439,257 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output) { - int i, j; - if (!show_img) return; - static int frame_id = 0; - frame_id++; + int i, j; + if (!show_img) return; + static int frame_id = 0; + frame_id++; - for (i = 0; i < num; ++i) { - char labelstr[4096] = { 0 }; - int class_id = -1; - for (j = 0; j < classes; ++j) { - if (dets[i].prob[j] > thresh) { - if (class_id < 0) { - strcat(labelstr, names[j]); - class_id = j; - } - else { - strcat(labelstr, ", "); - strcat(labelstr, names[j]); - } - printf("%s: %.0f%% ", names[j], dets[i].prob[j] * 100); - } - } - if (class_id >= 0) { - int width = show_img->height * .006; + for (i = 0; i < num; ++i) { + char labelstr[4096] = { 0 }; + int class_id = -1; + for (j = 0; j < classes; ++j) { + if (dets[i].prob[j] > thresh) { + if (class_id < 0) { + strcat(labelstr, names[j]); + class_id = j; + } + else { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + printf("%s: %.0f%% ", names[j], dets[i].prob[j] * 100); + } + } + if (class_id >= 0) { + int width = show_img->height * .006; - //if(0){ - //width = pow(prob, 1./2.)*10+1; - //alphabet = 0; - //} + //if(0){ + //width = pow(prob, 1./2.)*10+1; + //alphabet = 0; + //} - //printf("%d %s: %.0f%%\n", i, names[class_id], prob*100); - int offset = class_id * 123457 % classes; - float red = get_color(2, offset, classes); - float green = get_color(1, offset, classes); - float blue = get_color(0, offset, classes); - float rgb[3]; + //printf("%d %s: %.0f%%\n", i, names[class_id], prob*100); + int offset = class_id * 123457 % classes; + float red = get_color(2, offset, classes); + float green = get_color(1, offset, classes); + float blue = get_color(0, offset, classes); + float rgb[3]; - //width = prob*20+2; + //width = prob*20+2; - rgb[0] = red; - rgb[1] = green; - rgb[2] = blue; - box b = dets[i].bbox; - //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); - int left = (b.x - b.w / 2.)*show_img->width; - int right = (b.x + b.w / 2.)*show_img->width; - int top = (b.y - b.h / 2.)*show_img->height; - int bot = (b.y + b.h / 2.)*show_img->height; + int left = (b.x - b.w / 2.)*show_img->width; + int right = (b.x + b.w / 2.)*show_img->width; + int top = (b.y - b.h / 2.)*show_img->height; + int bot = (b.y + b.h / 2.)*show_img->height; - if (left < 0) left = 0; - if (right > show_img->width - 1) right = show_img->width - 1; - if (top < 0) top = 0; - if (bot > show_img->height - 1) bot = show_img->height - 1; + if (left < 0) left = 0; + if (right > show_img->width - 1) right = show_img->width - 1; + if (top < 0) top = 0; + if (bot > show_img->height - 1) bot = show_img->height - 1; - //int b_x_center = (left + right) / 2; - //int b_y_center = (top + bot) / 2; - //int b_width = right - left; - //int b_height = bot - top; - //sprintf(labelstr, "%d x %d - w: %d, h: %d", b_x_center, b_y_center, b_width, b_height); + //int b_x_center = (left + right) / 2; + //int b_y_center = (top + bot) / 2; + //int b_width = right - left; + //int b_height = bot - top; + //sprintf(labelstr, "%d x %d - w: %d, h: %d", b_x_center, b_y_center, b_width, b_height); - float const font_size = show_img->height / 1000.F; - CvPoint pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2; - pt1.x = left; - pt1.y = top; - pt2.x = right; - pt2.y = bot; - pt_text.x = left; - pt_text.y = top - 12; - pt_text_bg1.x = left; - pt_text_bg1.y = top - (10 + 25 * font_size); - pt_text_bg2.x = right; - pt_text_bg2.y = top; - CvScalar color; - color.val[0] = red * 256; - color.val[1] = green * 256; - color.val[2] = blue * 256; + float const font_size = show_img->height / 1000.F; + CvPoint pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2; + pt1.x = left; + pt1.y = top; + pt2.x = right; + pt2.y = bot; + pt_text.x = left; + pt_text.y = top - 12; + pt_text_bg1.x = left; + pt_text_bg1.y = top - (10 + 25 * font_size); + pt_text_bg2.x = right; + pt_text_bg2.y = top; + CvScalar color; + color.val[0] = red * 256; + color.val[1] = green * 256; + color.val[2] = blue * 256; - // you should create directory: result_img - //static int copied_frame_id = -1; - //static IplImage* copy_img = NULL; - //if (copied_frame_id != frame_id) { - // copied_frame_id = frame_id; - // if(copy_img == NULL) copy_img = cvCreateImage(cvSize(show_img->width, show_img->height), show_img->depth, show_img->nChannels); - // cvCopy(show_img, copy_img, 0); - //} - //static int img_id = 0; - //img_id++; - //char image_name[1024]; - //sprintf(image_name, "result_img/img_%d_%d_%d.jpg", frame_id, img_id, class_id); - //CvRect rect = cvRect(pt1.x, pt1.y, pt2.x - pt1.x, pt2.y - pt1.y); - //cvSetImageROI(copy_img, rect); - //cvSaveImage(image_name, copy_img, 0); - //cvResetImageROI(copy_img); + // you should create directory: result_img + //static int copied_frame_id = -1; + //static IplImage* copy_img = NULL; + //if (copied_frame_id != frame_id) { + // copied_frame_id = frame_id; + // if(copy_img == NULL) copy_img = cvCreateImage(cvSize(show_img->width, show_img->height), show_img->depth, show_img->nChannels); + // cvCopy(show_img, copy_img, 0); + //} + //static int img_id = 0; + //img_id++; + //char image_name[1024]; + //sprintf(image_name, "result_img/img_%d_%d_%d.jpg", frame_id, img_id, class_id); + //CvRect rect = cvRect(pt1.x, pt1.y, pt2.x - pt1.x, pt2.y - pt1.y); + //cvSetImageROI(copy_img, rect); + //cvSaveImage(image_name, copy_img, 0); + //cvResetImageROI(copy_img); - cvRectangle(show_img, pt1, pt2, color, width, 8, 0); - if (ext_output) - printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n", - (float)left, (float)top, b.w*show_img->width, b.h*show_img->height); - else - printf("\n"); - cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0); - cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0); // filled - CvScalar black_color; - black_color.val[0] = 0; - CvFont font; - cvInitFont(&font, CV_FONT_HERSHEY_SIMPLEX, font_size, font_size, 0, font_size * 3, 8); - cvPutText(show_img, labelstr, pt_text, &font, black_color); - } - } + cvRectangle(show_img, pt1, pt2, color, width, 8, 0); + if (ext_output) + printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n", + (float)left, (float)top, b.w*show_img->width, b.h*show_img->height); + else + printf("\n"); + cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0); + cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0); // filled + CvScalar black_color; + black_color.val[0] = 0; + CvFont font; + cvInitFont(&font, CV_FONT_HERSHEY_SIMPLEX, font_size, font_size, 0, font_size * 3, 8); + cvPutText(show_img, labelstr, pt_text, &font, black_color); + } + } } void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes) { - int i; + int i; - for (i = 0; i < num; ++i) { - int class_id = max_index(probs[i], classes); - float prob = probs[i][class_id]; - if (prob > thresh) { + for (i = 0; i < num; ++i) { + int class_id = max_index(probs[i], classes); + float prob = probs[i][class_id]; + if (prob > thresh) { - int width = show_img->height * .012; + int width = show_img->height * .012; - if (0) { - width = pow(prob, 1. / 2.) * 10 + 1; - alphabet = 0; - } + if (0) { + width = pow(prob, 1. / 2.) * 10 + 1; + alphabet = 0; + } - printf("%s: %.0f%%\n", names[class_id], prob * 100); - int offset = class_id * 123457 % classes; - float red = get_color(2, offset, classes); - float green = get_color(1, offset, classes); - float blue = get_color(0, offset, classes); - float rgb[3]; + printf("%s: %.0f%%\n", names[class_id], prob * 100); + int offset = class_id * 123457 % classes; + float red = get_color(2, offset, classes); + float green = get_color(1, offset, classes); + float blue = get_color(0, offset, classes); + float rgb[3]; - //width = prob*20+2; + //width = prob*20+2; - rgb[0] = red; - rgb[1] = green; - rgb[2] = blue; - box b = boxes[i]; + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = boxes[i]; - int left = (b.x - b.w / 2.)*show_img->width; - int right = (b.x + b.w / 2.)*show_img->width; - int top = (b.y - b.h / 2.)*show_img->height; - int bot = (b.y + b.h / 2.)*show_img->height; + int left = (b.x - b.w / 2.)*show_img->width; + int right = (b.x + b.w / 2.)*show_img->width; + int top = (b.y - b.h / 2.)*show_img->height; + int bot = (b.y + b.h / 2.)*show_img->height; - if (left < 0) left = 0; - if (right > show_img->width - 1) right = show_img->width - 1; - if (top < 0) top = 0; - if (bot > show_img->height - 1) bot = show_img->height - 1; + if (left < 0) left = 0; + if (right > show_img->width - 1) right = show_img->width - 1; + if (top < 0) top = 0; + if (bot > show_img->height - 1) bot = show_img->height - 1; - float const font_size = show_img->height / 1000.F; - CvPoint pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2; - pt1.x = left; - pt1.y = top; - pt2.x = right; - pt2.y = bot; - pt_text.x = left; - pt_text.y = top - 12; - pt_text_bg1.x = left; - pt_text_bg1.y = top - (10+25*font_size); - pt_text_bg2.x = right; - pt_text_bg2.y = top; - CvScalar color; - color.val[0] = red * 256; - color.val[1] = green * 256; - color.val[2] = blue * 256; + float const font_size = show_img->height / 1000.F; + CvPoint pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2; + pt1.x = left; + pt1.y = top; + pt2.x = right; + pt2.y = bot; + pt_text.x = left; + pt_text.y = top - 12; + pt_text_bg1.x = left; + pt_text_bg1.y = top - (10+25*font_size); + pt_text_bg2.x = right; + pt_text_bg2.y = top; + CvScalar color; + color.val[0] = red * 256; + color.val[1] = green * 256; + color.val[2] = blue * 256; - cvRectangle(show_img, pt1, pt2, color, width, 8, 0); - //printf("left=%d, right=%d, top=%d, bottom=%d, obj_id=%d, obj=%s \n", left, right, top, bot, class_id, names[class_id]); - cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0); - cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0); // filled - CvScalar black_color; - black_color.val[0] = 0; - CvFont font; - cvInitFont(&font, CV_FONT_HERSHEY_SIMPLEX, font_size, font_size, 0, font_size * 3, 8); - cvPutText(show_img, names[class_id], pt_text, &font, black_color); - } - } + cvRectangle(show_img, pt1, pt2, color, width, 8, 0); + //printf("left=%d, right=%d, top=%d, bottom=%d, obj_id=%d, obj=%s \n", left, right, top, bot, class_id, names[class_id]); + cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0); + cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0); // filled + CvScalar black_color; + black_color.val[0] = 0; + CvFont font; + cvInitFont(&font, CV_FONT_HERSHEY_SIMPLEX, font_size, font_size, 0, font_size * 3, 8); + cvPutText(show_img, names[class_id], pt_text, &font, black_color); + } + } } IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_lines, int img_size) { - int img_offset = 50; - int draw_size = img_size - img_offset; - IplImage* img = cvCreateImage(cvSize(img_size, img_size), 8, 3); - cvSet(img, CV_RGB(255, 255, 255), 0); - CvPoint pt1, pt2, pt_text; - CvFont font; - cvInitFont(&font, CV_FONT_HERSHEY_COMPLEX_SMALL, 0.7, 0.7, 0, 1, CV_AA); - char char_buff[100]; - int i; - // vertical lines - pt1.x = img_offset; pt2.x = img_size, pt_text.x = 10; - for (i = 1; i <= number_of_lines; ++i) { - pt1.y = pt2.y = (float)i * draw_size / number_of_lines; - cvLine(img, pt1, pt2, CV_RGB(224, 224, 224), 1, 8, 0); - if (i % 10 == 0) { - sprintf(char_buff, "%2.1f", max_img_loss*(number_of_lines - i) / number_of_lines); - pt_text.y = pt1.y + 5; - cvPutText(img, char_buff, pt_text, &font, CV_RGB(0, 0, 0)); - cvLine(img, pt1, pt2, CV_RGB(128, 128, 128), 1, 8, 0); - } - } - // horizontal lines - pt1.y = draw_size; pt2.y = 0, pt_text.y = draw_size + 15; - for (i = 0; i <= number_of_lines; ++i) { - pt1.x = pt2.x = img_offset + (float)i * draw_size / number_of_lines; - cvLine(img, pt1, pt2, CV_RGB(224, 224, 224), 1, 8, 0); - if (i % 10 == 0) { - sprintf(char_buff, "%d", max_batches * i / number_of_lines); - pt_text.x = pt1.x - 20; - cvPutText(img, char_buff, pt_text, &font, CV_RGB(0, 0, 0)); - cvLine(img, pt1, pt2, CV_RGB(128, 128, 128), 1, 8, 0); - } - } - cvPutText(img, "Iteration number", cvPoint(draw_size / 2, img_size - 10), &font, CV_RGB(0, 0, 0)); - cvPutText(img, "Press 's' to save: chart.jpg", cvPoint(5, img_size - 10), &font, CV_RGB(0, 0, 0)); - printf(" If error occurs - run training with flag: -dont_show \n"); - cvNamedWindow("average loss", CV_WINDOW_NORMAL); - cvMoveWindow("average loss", 0, 0); - cvResizeWindow("average loss", img_size, img_size); - cvShowImage("average loss", img); - cvWaitKey(20); - return img; + int img_offset = 50; + int draw_size = img_size - img_offset; + IplImage* img = cvCreateImage(cvSize(img_size, img_size), 8, 3); + cvSet(img, CV_RGB(255, 255, 255), 0); + CvPoint pt1, pt2, pt_text; + CvFont font; + cvInitFont(&font, CV_FONT_HERSHEY_COMPLEX_SMALL, 0.7, 0.7, 0, 1, CV_AA); + char char_buff[100]; + int i; + // vertical lines + pt1.x = img_offset; pt2.x = img_size, pt_text.x = 10; + for (i = 1; i <= number_of_lines; ++i) { + pt1.y = pt2.y = (float)i * draw_size / number_of_lines; + cvLine(img, pt1, pt2, CV_RGB(224, 224, 224), 1, 8, 0); + if (i % 10 == 0) { + sprintf(char_buff, "%2.1f", max_img_loss*(number_of_lines - i) / number_of_lines); + pt_text.y = pt1.y + 5; + cvPutText(img, char_buff, pt_text, &font, CV_RGB(0, 0, 0)); + cvLine(img, pt1, pt2, CV_RGB(128, 128, 128), 1, 8, 0); + } + } + // horizontal lines + pt1.y = draw_size; pt2.y = 0, pt_text.y = draw_size + 15; + for (i = 0; i <= number_of_lines; ++i) { + pt1.x = pt2.x = img_offset + (float)i * draw_size / number_of_lines; + cvLine(img, pt1, pt2, CV_RGB(224, 224, 224), 1, 8, 0); + if (i % 10 == 0) { + sprintf(char_buff, "%d", max_batches * i / number_of_lines); + pt_text.x = pt1.x - 20; + cvPutText(img, char_buff, pt_text, &font, CV_RGB(0, 0, 0)); + cvLine(img, pt1, pt2, CV_RGB(128, 128, 128), 1, 8, 0); + } + } + cvPutText(img, "Iteration number", cvPoint(draw_size / 2, img_size - 10), &font, CV_RGB(0, 0, 0)); + cvPutText(img, "Press 's' to save: chart.jpg", cvPoint(5, img_size - 10), &font, CV_RGB(0, 0, 0)); + printf(" If error occurs - run training with flag: -dont_show \n"); + cvNamedWindow("average loss", CV_WINDOW_NORMAL); + cvMoveWindow("average loss", 0, 0); + cvResizeWindow("average loss", img_size, img_size); + cvShowImage("average loss", img); + cvWaitKey(20); + return img; } void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches) { - int img_offset = 50; - int draw_size = img_size - img_offset; - CvFont font; - cvInitFont(&font, CV_FONT_HERSHEY_COMPLEX_SMALL, 0.7, 0.7, 0, 1, CV_AA); - char char_buff[100]; - CvPoint pt1, pt2; - pt1.x = img_offset + draw_size * (float)current_batch / max_batches; - pt1.y = draw_size * (1 - avg_loss / max_img_loss); - if (pt1.y < 0) pt1.y = 1; - cvCircle(img, pt1, 1, CV_RGB(0, 0, 255), CV_FILLED, 8, 0); + int img_offset = 50; + int draw_size = img_size - img_offset; + CvFont font; + cvInitFont(&font, CV_FONT_HERSHEY_COMPLEX_SMALL, 0.7, 0.7, 0, 1, CV_AA); + char char_buff[100]; + CvPoint pt1, pt2; + pt1.x = img_offset + draw_size * (float)current_batch / max_batches; + pt1.y = draw_size * (1 - avg_loss / max_img_loss); + if (pt1.y < 0) pt1.y = 1; + cvCircle(img, pt1, 1, CV_RGB(0, 0, 255), CV_FILLED, 8, 0); - sprintf(char_buff, "current avg loss = %2.4f", avg_loss); - pt1.x = img_size / 2, pt1.y = 30; - pt2.x = pt1.x + 250, pt2.y = pt1.y + 20; - cvRectangle(img, pt1, pt2, CV_RGB(255, 255, 255), CV_FILLED, 8, 0); - pt1.y += 15; - cvPutText(img, char_buff, pt1, &font, CV_RGB(0, 0, 0)); - cvShowImage("average loss", img); - int k = cvWaitKey(20); - if (k == 's' || current_batch == (max_batches-1)) cvSaveImage("chart.jpg", img, 0); + sprintf(char_buff, "current avg loss = %2.4f", avg_loss); + pt1.x = img_size / 2, pt1.y = 30; + pt2.x = pt1.x + 250, pt2.y = pt1.y + 20; + cvRectangle(img, pt1, pt2, CV_RGB(255, 255, 255), CV_FILLED, 8, 0); + pt1.y += 15; + cvPutText(img, char_buff, pt1, &font, CV_RGB(0, 0, 0)); + cvShowImage("average loss", img); + int k = cvWaitKey(20); + if (k == 's' || current_batch == (max_batches-1)) cvSaveImage("chart.jpg", img, 0); } -#endif // OPENCV +#endif // OPENCV void transpose_image(image im) { @@ -909,15 +909,15 @@ void show_image_cv(image p, const char *name) void show_image_cv_ipl(IplImage *disp, const char *name) { - if (disp == NULL) return; - char buff[256]; - //sprintf(buff, "%s (%d)", name, windows); - sprintf(buff, "%s", name); - cvNamedWindow(buff, CV_WINDOW_NORMAL); - //cvMoveWindow(buff, 100*(windows%10) + 200*(windows/10), 100*(windows%10)); - ++windows; - cvShowImage(buff, disp); - //cvReleaseImage(&disp); + if (disp == NULL) return; + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + sprintf(buff, "%s", name); + cvNamedWindow(buff, CV_WINDOW_NORMAL); + //cvMoveWindow(buff, 100*(windows%10) + 200*(windows/10), 100*(windows%10)); + ++windows; + cvShowImage(buff, disp); + //cvReleaseImage(&disp); } #endif @@ -966,22 +966,22 @@ image load_image_cv(char *filename, int channels) if( (src = cvLoadImage(filename, flag)) == 0 ) { - char shrinked_filename[1024]; - if (strlen(filename) >= 1024) sprintf(shrinked_filename, "name is too long"); - else sprintf(shrinked_filename, "%s", filename); - fprintf(stderr, "Cannot load image \"%s\"\n", shrinked_filename); - FILE* fw = fopen("bad.list", "a"); - fwrite(shrinked_filename, sizeof(char), strlen(shrinked_filename), fw); - char *new_line = "\n"; - fwrite(new_line, sizeof(char), strlen(new_line), fw); - fclose(fw); + char shrinked_filename[1024]; + if (strlen(filename) >= 1024) sprintf(shrinked_filename, "name is too long"); + else sprintf(shrinked_filename, "%s", filename); + fprintf(stderr, "Cannot load image \"%s\"\n", shrinked_filename); + FILE* fw = fopen("bad.list", "a"); + fwrite(shrinked_filename, sizeof(char), strlen(shrinked_filename), fw); + char *new_line = "\n"; + fwrite(new_line, sizeof(char), strlen(new_line), fw); + fclose(fw); return make_image(10,10,3); //exit(EXIT_FAILURE); } image out = ipl_to_image(src); cvReleaseImage(&src); - if (out.c > 1) - rgbgr_image(out); + if (out.c > 1) + rgbgr_image(out); return out; } @@ -996,119 +996,119 @@ image get_image_from_stream(CvCapture *cap) image get_image_from_stream_cpp(CvCapture *cap) { - //IplImage* src = cvQueryFrame(cap); - IplImage* src; - static int once = 1; - if (once) { - once = 0; - do { - src = get_webcam_frame(cap); - if (!src) return make_empty_image(0, 0, 0); - } while (src->width < 1 || src->height < 1 || src->nChannels < 1); - printf("Video stream: %d x %d \n", src->width, src->height); - } - else - src = get_webcam_frame(cap); + //IplImage* src = cvQueryFrame(cap); + IplImage* src; + static int once = 1; + if (once) { + once = 0; + do { + src = get_webcam_frame(cap); + if (!src) return make_empty_image(0, 0, 0); + } while (src->width < 1 || src->height < 1 || src->nChannels < 1); + printf("Video stream: %d x %d \n", src->width, src->height); + } + else + src = get_webcam_frame(cap); - if (!src) return make_empty_image(0, 0, 0); - image im = ipl_to_image(src); - rgbgr_image(im); - return im; + if (!src) return make_empty_image(0, 0, 0); + image im = ipl_to_image(src); + rgbgr_image(im); + return im; } int wait_for_stream(CvCapture *cap, IplImage* src, int dont_close) { - if (!src) { - if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, 3); - else return 0; - } - if (src->width < 1 || src->height < 1 || src->nChannels < 1) { - if (dont_close) { - cvReleaseImage(&src); - int z = 0; - for (z = 0; z < 20; ++z) { - get_webcam_frame(cap); - cvReleaseImage(&src); - } - src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, 3); - } - else return 0; - } - return 1; + if (!src) { + if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, 3); + else return 0; + } + if (src->width < 1 || src->height < 1 || src->nChannels < 1) { + if (dont_close) { + cvReleaseImage(&src); + int z = 0; + for (z = 0; z < 20; ++z) { + get_webcam_frame(cap); + cvReleaseImage(&src); + } + src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, 3); + } + else return 0; + } + return 1; } image get_image_from_stream_resize(CvCapture *cap, int w, int h, int c, IplImage** in_img, int cpp_video_capture, int dont_close) { - c = c ? c : 3; - IplImage* src; - if (cpp_video_capture) { - static int once = 1; - if (once) { - once = 0; - do { - src = get_webcam_frame(cap); - if (!src) return make_empty_image(0, 0, 0); - } while (src->width < 1 || src->height < 1 || src->nChannels < 1); - printf("Video stream: %d x %d \n", src->width, src->height); - } else - src = get_webcam_frame(cap); - } - else src = cvQueryFrame(cap); + c = c ? c : 3; + IplImage* src; + if (cpp_video_capture) { + static int once = 1; + if (once) { + once = 0; + do { + src = get_webcam_frame(cap); + if (!src) return make_empty_image(0, 0, 0); + } while (src->width < 1 || src->height < 1 || src->nChannels < 1); + printf("Video stream: %d x %d \n", src->width, src->height); + } else + src = get_webcam_frame(cap); + } + else src = cvQueryFrame(cap); - if (cpp_video_capture) - if(!wait_for_stream(cap, src, dont_close)) return make_empty_image(0, 0, 0); - IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, c); - *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, c); - cvResize(src, *in_img, CV_INTER_LINEAR); - cvResize(src, new_img, CV_INTER_LINEAR); - image im = ipl_to_image(new_img); - cvReleaseImage(&new_img); - if (cpp_video_capture) cvReleaseImage(&src); - if (c>1) - rgbgr_image(im); - return im; + if (cpp_video_capture) + if(!wait_for_stream(cap, src, dont_close)) return make_empty_image(0, 0, 0); + IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, c); + *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, c); + cvResize(src, *in_img, CV_INTER_LINEAR); + cvResize(src, new_img, CV_INTER_LINEAR); + image im = ipl_to_image(new_img); + cvReleaseImage(&new_img); + if (cpp_video_capture) cvReleaseImage(&src); + if (c>1) + rgbgr_image(im); + return im; } image get_image_from_stream_letterbox(CvCapture *cap, int w, int h, int c, IplImage** in_img, int cpp_video_capture, int dont_close) { - c = c ? c : 3; - IplImage* src; - if (cpp_video_capture) { - static int once = 1; - if (once) { - once = 0; - do { - src = get_webcam_frame(cap); - if (!src) return make_empty_image(0, 0, 0); - } while (src->width < 1 || src->height < 1 || src->nChannels < 1); - printf("Video stream: %d x %d \n", src->width, src->height); - } - else - src = get_webcam_frame(cap); - } - else src = cvQueryFrame(cap); + c = c ? c : 3; + IplImage* src; + if (cpp_video_capture) { + static int once = 1; + if (once) { + once = 0; + do { + src = get_webcam_frame(cap); + if (!src) return make_empty_image(0, 0, 0); + } while (src->width < 1 || src->height < 1 || src->nChannels < 1); + printf("Video stream: %d x %d \n", src->width, src->height); + } + else + src = get_webcam_frame(cap); + } + else src = cvQueryFrame(cap); - if (cpp_video_capture) - if (!wait_for_stream(cap, src, dont_close)) return make_empty_image(0, 0, 0); - *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, c); - cvResize(src, *in_img, CV_INTER_LINEAR); - image tmp = ipl_to_image(src); - image im = letterbox_image(tmp, w, h); - free_image(tmp); - if (cpp_video_capture) cvReleaseImage(&src); - if (c>1) rgbgr_image(im); - return im; + if (cpp_video_capture) + if (!wait_for_stream(cap, src, dont_close)) return make_empty_image(0, 0, 0); + *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, c); + cvResize(src, *in_img, CV_INTER_LINEAR); + image tmp = ipl_to_image(src); + image im = letterbox_image(tmp, w, h); + free_image(tmp); + if (cpp_video_capture) cvReleaseImage(&src); + if (c>1) rgbgr_image(im); + return im; } int get_stream_fps(CvCapture *cap, int cpp_video_capture) { - int fps = 25; - if (cpp_video_capture) { - fps = get_stream_fps_cpp(cap); - } - else { - fps = cvGetCaptureProperty(cap, CV_CAP_PROP_FPS); - } - return fps; + int fps = 25; + if (cpp_video_capture) { + fps = get_stream_fps_cpp(cap); + } + else { + fps = cvGetCaptureProperty(cap, CV_CAP_PROP_FPS); + } + return fps; } void save_image_jpg(image p, const char *name) @@ -1358,47 +1358,47 @@ void composite_3d(char *f1, char *f2, char *out, int delta) void fill_image(image m, float s) { - int i; - for (i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; + int i; + for (i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; } void letterbox_image_into(image im, int w, int h, image boxed) { - int new_w = im.w; - int new_h = im.h; - if (((float)w / im.w) < ((float)h / im.h)) { - new_w = w; - new_h = (im.h * w) / im.w; - } - else { - new_h = h; - new_w = (im.w * h) / im.h; - } - image resized = resize_image(im, new_w, new_h); - embed_image(resized, boxed, (w - new_w) / 2, (h - new_h) / 2); - free_image(resized); + int new_w = im.w; + int new_h = im.h; + if (((float)w / im.w) < ((float)h / im.h)) { + new_w = w; + new_h = (im.h * w) / im.w; + } + else { + new_h = h; + new_w = (im.w * h) / im.h; + } + image resized = resize_image(im, new_w, new_h); + embed_image(resized, boxed, (w - new_w) / 2, (h - new_h) / 2); + free_image(resized); } image letterbox_image(image im, int w, int h) { - int new_w = im.w; - int new_h = im.h; - if (((float)w / im.w) < ((float)h / im.h)) { - new_w = w; - new_h = (im.h * w) / im.w; - } - else { - new_h = h; - new_w = (im.w * h) / im.h; - } - image resized = resize_image(im, new_w, new_h); - image boxed = make_image(w, h, im.c); - fill_image(boxed, .5); - //int i; - //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; - embed_image(resized, boxed, (w - new_w) / 2, (h - new_h) / 2); - free_image(resized); - return boxed; + int new_w = im.w; + int new_h = im.h; + if (((float)w / im.w) < ((float)h / im.h)) { + new_w = w; + new_h = (im.h * w) / im.w; + } + else { + new_h = h; + new_w = (im.w * h) / im.h; + } + image resized = resize_image(im, new_w, new_h); + image boxed = make_image(w, h, im.c); + fill_image(boxed, .5); + //int i; + //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; + embed_image(resized, boxed, (w - new_w) / 2, (h - new_h) / 2); + free_image(resized); + return boxed; } image resize_max(image im, int max) @@ -1660,23 +1660,23 @@ void exposure_image(image im, float sat) void distort_image(image im, float hue, float sat, float val) { - if (im.c >= 3) - { - rgb_to_hsv(im); - scale_image_channel(im, 1, sat); - scale_image_channel(im, 2, val); - int i; - for(i = 0; i < im.w*im.h; ++i){ - im.data[i] = im.data[i] + hue; - if (im.data[i] > 1) im.data[i] -= 1; - if (im.data[i] < 0) im.data[i] += 1; - } - hsv_to_rgb(im); - } - else - { - scale_image_channel(im, 0, val); - } + if (im.c >= 3) + { + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, val); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + } + else + { + scale_image_channel(im, 0, val); + } constrain_image(im); } @@ -1812,16 +1812,16 @@ image load_image_stb(char *filename, int channels) int w, h, c; unsigned char *data = stbi_load(filename, &w, &h, &c, channels); if (!data) { - char shrinked_filename[1024]; - if (strlen(filename) >= 1024) sprintf(shrinked_filename, "name is too long"); - else sprintf(shrinked_filename, "%s", filename); - fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", shrinked_filename, stbi_failure_reason()); - FILE* fw = fopen("bad.list", "a"); - fwrite(shrinked_filename, sizeof(char), strlen(shrinked_filename), fw); - char *new_line = "\n"; - fwrite(new_line, sizeof(char), strlen(new_line), fw); - fclose(fw); - return make_image(10, 10, 3); + char shrinked_filename[1024]; + if (strlen(filename) >= 1024) sprintf(shrinked_filename, "name is too long"); + else sprintf(shrinked_filename, "%s", filename); + fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", shrinked_filename, stbi_failure_reason()); + FILE* fw = fopen("bad.list", "a"); + fwrite(shrinked_filename, sizeof(char), strlen(shrinked_filename), fw); + char *new_line = "\n"; + fwrite(new_line, sizeof(char), strlen(new_line), fw); + fclose(fw); + return make_image(10, 10, 3); //exit(EXIT_FAILURE); } if(channels) c = channels; @@ -1845,14 +1845,14 @@ image load_image(char *filename, int w, int h, int c) #ifdef OPENCV #ifndef CV_VERSION_EPOCH - //image out = load_image_stb(filename, c); // OpenCV 3.x - image out = load_image_cv(filename, c); + //image out = load_image_stb(filename, c); // OpenCV 3.x + image out = load_image_cv(filename, c); #else - image out = load_image_cv(filename, c); // OpenCV 2.4.x + image out = load_image_cv(filename, c); // OpenCV 2.4.x #endif #else - image out = load_image_stb(filename, c); // without OpenCV + image out = load_image_stb(filename, c); // without OpenCV #endif if((h && w) && (h != out.h || w != out.w)){ diff --git a/src/list.c b/src/list.c index e83f63ef..39e3033d 100644 --- a/src/list.c +++ b/src/list.c @@ -5,11 +5,11 @@ list *make_list() { - list *l = malloc(sizeof(list)); - l->size = 0; - l->front = 0; - l->back = 0; - return l; + list *l = malloc(sizeof(list)); + l->size = 0; + l->front = 0; + l->back = 0; + return l; } /* @@ -40,55 +40,55 @@ void *list_pop(list *l){ void list_insert(list *l, void *val) { - node *new = malloc(sizeof(node)); - new->val = val; - new->next = 0; + node *new = malloc(sizeof(node)); + new->val = val; + new->next = 0; - if(!l->back){ - l->front = new; - new->prev = 0; - }else{ - l->back->next = new; - new->prev = l->back; - } - l->back = new; - ++l->size; + if(!l->back){ + l->front = new; + new->prev = 0; + }else{ + l->back->next = new; + new->prev = l->back; + } + l->back = new; + ++l->size; } void free_node(node *n) { - node *next; - while(n) { - next = n->next; - free(n); - n = next; - } + node *next; + while(n) { + next = n->next; + free(n); + n = next; + } } void free_list(list *l) { - free_node(l->front); - free(l); + free_node(l->front); + free(l); } void free_list_contents(list *l) { - node *n = l->front; - while(n){ - free(n->val); - n = n->next; - } + node *n = l->front; + while(n){ + free(n->val); + n = n->next; + } } void free_list_contents_kvp(list *l) { - node *n = l->front; - while (n) { - kvp *p = n->val; - free(p->key); - free(n->val); - n = n->next; - } + node *n = l->front; + while (n) { + kvp *p = n->val; + free(p->key); + free(n->val); + n = n->next; + } } void **list_to_array(list *l) diff --git a/src/network.c b/src/network.c index d135a29f..3df837d6 100644 --- a/src/network.c +++ b/src/network.c @@ -33,19 +33,19 @@ network *load_network_custom(char *cfg, char *weights, int clear, int batch) { - printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear); - network *net = calloc(1, sizeof(network)); - *net = parse_network_cfg_custom(cfg, batch); - if (weights && weights[0] != 0) { - load_weights(net, weights); - } - if (clear) (*net->seen) = 0; - return net; + printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear); + network *net = calloc(1, sizeof(network)); + *net = parse_network_cfg_custom(cfg, batch); + if (weights && weights[0] != 0) { + load_weights(net, weights); + } + if (clear) (*net->seen) = 0; + return net; } network *load_network(char *cfg, char *weights, int clear) { - return load_network_custom(cfg, weights, clear, 0); + return load_network_custom(cfg, weights, clear, 0); } int get_current_batch(network net) @@ -67,23 +67,23 @@ void reset_momentum(network net) void reset_network_state(network *net, int b) { - int i; - for (i = 0; i < net->n; ++i) { + int i; + for (i = 0; i < net->n; ++i) { #ifdef GPU - layer l = net->layers[i]; - if (l.state_gpu) { - fill_ongpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); - } - if (l.h_gpu) { - fill_ongpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); - } + layer l = net->layers[i]; + if (l.state_gpu) { + fill_ongpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); + } + if (l.h_gpu) { + fill_ongpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); + } #endif - } + } } void reset_rnn(network *net) { - reset_network_state(net, 0); + reset_network_state(net, 0); } float get_current_rate(network net) @@ -91,7 +91,7 @@ float get_current_rate(network net) int batch_num = get_current_batch(net); int i; float rate; - if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power); + if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power); switch (net.policy) { case CONSTANT: return net.learning_rate; @@ -108,7 +108,7 @@ float get_current_rate(network net) case EXP: return net.learning_rate * pow(net.gamma, batch_num); case POLY: - return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power); + return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power); //if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power); //return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power); case RANDOM: @@ -182,10 +182,10 @@ network make_network(int n) net.input_gpu = calloc(1, sizeof(float *)); net.truth_gpu = calloc(1, sizeof(float *)); - net.input16_gpu = calloc(1, sizeof(float *)); - net.output16_gpu = calloc(1, sizeof(float *)); - net.max_input16_size = calloc(1, sizeof(size_t)); - net.max_output16_size = calloc(1, sizeof(size_t)); + net.input16_gpu = calloc(1, sizeof(float *)); + net.output16_gpu = calloc(1, sizeof(float *)); + net.max_input16_size = calloc(1, sizeof(size_t)); + net.max_output16_size = calloc(1, sizeof(size_t)); #endif return net; } @@ -362,20 +362,20 @@ void set_batch_network(network *net, int b) net->layers[i].batch = b; #ifdef CUDNN if(net->layers[i].type == CONVOLUTIONAL){ - cudnn_convolutional_setup(net->layers + i, cudnn_fastest); - /* - layer *l = net->layers + i; + cudnn_convolutional_setup(net->layers + i, cudnn_fastest); + /* + layer *l = net->layers + i; cudnn_convolutional_setup(l, cudnn_fastest); - // check for excessive memory consumption - size_t free_byte; - size_t total_byte; - check_error(cudaMemGetInfo(&free_byte, &total_byte)); - if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { - printf(" used slow CUDNN algo without Workspace! \n"); - cudnn_convolutional_setup(l, cudnn_smallest); - l->workspace_size = get_workspace_size(*l); - } - */ + // check for excessive memory consumption + size_t free_byte; + size_t total_byte; + check_error(cudaMemGetInfo(&free_byte, &total_byte)); + if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { + printf(" used slow CUDNN algo without Workspace! \n"); + cudnn_convolutional_setup(l, cudnn_smallest); + l->workspace_size = get_workspace_size(*l); + } + */ } #endif } @@ -387,12 +387,12 @@ int resize_network(network *net, int w, int h) cuda_set_device(net->gpu_index); if(gpu_index >= 0){ cuda_free(net->workspace); - if (net->input_gpu) { - cuda_free(*net->input_gpu); - *net->input_gpu = 0; - cuda_free(*net->truth_gpu); - *net->truth_gpu = 0; - } + if (net->input_gpu) { + cuda_free(*net->input_gpu); + *net->input_gpu = 0; + cuda_free(*net->truth_gpu); + *net->truth_gpu = 0; + } } #endif int i; @@ -405,7 +405,7 @@ int resize_network(network *net, int w, int h) //fflush(stderr); for (i = 0; i < net->n; ++i){ layer l = net->layers[i]; - //printf(" %d: layer = %d,", i, l.type); + //printf(" %d: layer = %d,", i, l.type); if(l.type == CONVOLUTIONAL){ resize_convolutional_layer(&l, w, h); }else if(l.type == CROP){ @@ -414,14 +414,14 @@ int resize_network(network *net, int w, int h) resize_maxpool_layer(&l, w, h); }else if(l.type == REGION){ resize_region_layer(&l, w, h); - }else if (l.type == YOLO) { - resize_yolo_layer(&l, w, h); + }else if (l.type == YOLO) { + resize_yolo_layer(&l, w, h); }else if(l.type == ROUTE){ resize_route_layer(&l, net); - }else if (l.type == SHORTCUT) { - resize_shortcut_layer(&l, w, h); - }else if (l.type == UPSAMPLE) { - resize_upsample_layer(&l, w, h); + }else if (l.type == SHORTCUT) { + resize_shortcut_layer(&l, w, h); + }else if (l.type == UPSAMPLE) { + resize_upsample_layer(&l, w, h); }else if(l.type == REORG){ resize_reorg_layer(&l, w, h); }else if(l.type == AVGPOOL){ @@ -431,7 +431,7 @@ int resize_network(network *net, int w, int h) }else if(l.type == COST){ resize_cost_layer(&l, inputs); }else{ - fprintf(stderr, "Resizing type %d \n", (int)l.type); + fprintf(stderr, "Resizing type %d \n", (int)l.type); error("Cannot resize this type of layer"); } if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; @@ -443,9 +443,9 @@ int resize_network(network *net, int w, int h) } #ifdef GPU if(gpu_index >= 0){ - printf(" try to allocate workspace = %zu * sizeof(float), ", workspace_size / sizeof(float) + 1); + printf(" try to allocate workspace = %zu * sizeof(float), ", workspace_size / sizeof(float) + 1); net->workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1); - printf(" CUDA allocate done! \n"); + printf(" CUDA allocate done! \n"); }else { free(net->workspace); net->workspace = calloc(1, workspace_size); @@ -551,112 +551,112 @@ float *network_predict(network net, float *input) int num_detections(network *net, float thresh) { - int i; - int s = 0; - for (i = 0; i < net->n; ++i) { - layer l = net->layers[i]; - if (l.type == YOLO) { - s += yolo_num_detections(l, thresh); - } - if (l.type == DETECTION || l.type == REGION) { - s += l.w*l.h*l.n; - } - } - return s; + int i; + int s = 0; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == YOLO) { + s += yolo_num_detections(l, thresh); + } + if (l.type == DETECTION || l.type == REGION) { + s += l.w*l.h*l.n; + } + } + return s; } detection *make_network_boxes(network *net, float thresh, int *num) { - layer l = net->layers[net->n - 1]; - int i; - int nboxes = num_detections(net, thresh); - if (num) *num = nboxes; - detection *dets = calloc(nboxes, sizeof(detection)); - for (i = 0; i < nboxes; ++i) { - dets[i].prob = calloc(l.classes, sizeof(float)); - if (l.coords > 4) { - dets[i].mask = calloc(l.coords - 4, sizeof(float)); - } - } - return dets; + layer l = net->layers[net->n - 1]; + int i; + int nboxes = num_detections(net, thresh); + if (num) *num = nboxes; + detection *dets = calloc(nboxes, sizeof(detection)); + for (i = 0; i < nboxes; ++i) { + dets[i].prob = calloc(l.classes, sizeof(float)); + if (l.coords > 4) { + dets[i].mask = calloc(l.coords - 4, sizeof(float)); + } + } + return dets; } void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter) { - box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); - float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); - int i, j; - for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float)); - get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map); - for (j = 0; j < l.w*l.h*l.n; ++j) { - dets[j].classes = l.classes; - dets[j].bbox = boxes[j]; - dets[j].objectness = 1; - for (i = 0; i < l.classes; ++i) { - dets[j].prob[i] = probs[j][i]; - } - } + box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); + float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); + int i, j; + for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float)); + get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map); + for (j = 0; j < l.w*l.h*l.n; ++j) { + dets[j].classes = l.classes; + dets[j].bbox = boxes[j]; + dets[j].objectness = 1; + for (i = 0; i < l.classes; ++i) { + dets[j].prob[i] = probs[j][i]; + } + } - free(boxes); - free_ptrs((void **)probs, l.w*l.h*l.n); + free(boxes); + free_ptrs((void **)probs, l.w*l.h*l.n); - //correct_region_boxes(dets, l.w*l.h*l.n, w, h, net_w, net_h, relative); - correct_yolo_boxes(dets, l.w*l.h*l.n, w, h, net_w, net_h, relative, letter); + //correct_region_boxes(dets, l.w*l.h*l.n, w, h, net_w, net_h, relative); + correct_yolo_boxes(dets, l.w*l.h*l.n, w, h, net_w, net_h, relative, letter); } void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets, int letter) { - int prev_classes = -1; - int j; - for (j = 0; j < net->n; ++j) { - layer l = net->layers[j]; - if (l.type == YOLO) { - int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets, letter); - dets += count; - if (prev_classes < 0) prev_classes = l.classes; - else if (prev_classes != l.classes) { - printf(" Error: Different [yolo] layers have different number of classes = %d and %d - check your cfg-file! \n", - prev_classes, l.classes); - } - } - if (l.type == REGION) { - custom_get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets, letter); - //get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); - dets += l.w*l.h*l.n; - } - if (l.type == DETECTION) { - get_detection_detections(l, w, h, thresh, dets); - dets += l.w*l.h*l.n; - } - } + int prev_classes = -1; + int j; + for (j = 0; j < net->n; ++j) { + layer l = net->layers[j]; + if (l.type == YOLO) { + int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets, letter); + dets += count; + if (prev_classes < 0) prev_classes = l.classes; + else if (prev_classes != l.classes) { + printf(" Error: Different [yolo] layers have different number of classes = %d and %d - check your cfg-file! \n", + prev_classes, l.classes); + } + } + if (l.type == REGION) { + custom_get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets, letter); + //get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l.w*l.h*l.n; + } + if (l.type == DETECTION) { + get_detection_detections(l, w, h, thresh, dets); + dets += l.w*l.h*l.n; + } + } } detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter) { - detection *dets = make_network_boxes(net, thresh, num); - fill_network_boxes(net, w, h, thresh, hier, map, relative, dets, letter); - return dets; + detection *dets = make_network_boxes(net, thresh, num); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets, letter); + return dets; } void free_detections(detection *dets, int n) { - int i; - for (i = 0; i < n; ++i) { - free(dets[i].prob); - if (dets[i].mask) free(dets[i].mask); - } - free(dets); + int i; + for (i = 0; i < n; ++i) { + free(dets[i].prob); + if (dets[i].mask) free(dets[i].mask); + } + free(dets); } float *network_predict_image(network *net, image im) { - //image imr = letterbox_image(im, net->w, net->h); - image imr = resize_image(im, net->w, net->h); - set_batch_network(net, 1); - float *p = network_predict(*net, imr.data); - free_image(imr); - return p; + //image imr = letterbox_image(im, net->w, net->h); + image imr = resize_image(im, net->w, net->h); + set_batch_network(net, 1); + float *p = network_predict(*net, imr.data); + free_image(imr); + return p; } int network_width(network *net) { return net->w; } @@ -780,70 +780,70 @@ float network_accuracy_multi(network net, data d, int n) void free_network(network net) { - int i; - for (i = 0; i < net.n; ++i) { - free_layer(net.layers[i]); - } - free(net.layers); + int i; + for (i = 0; i < net.n; ++i) { + free_layer(net.layers[i]); + } + free(net.layers); - free(net.scales); - free(net.steps); - free(net.seen); + free(net.scales); + free(net.steps); + free(net.seen); #ifdef GPU - if (gpu_index >= 0) cuda_free(net.workspace); - else free(net.workspace); - if (*net.input_gpu) cuda_free(*net.input_gpu); - if (*net.truth_gpu) cuda_free(*net.truth_gpu); - if (net.input_gpu) free(net.input_gpu); - if (net.truth_gpu) free(net.truth_gpu); + if (gpu_index >= 0) cuda_free(net.workspace); + else free(net.workspace); + if (*net.input_gpu) cuda_free(*net.input_gpu); + if (*net.truth_gpu) cuda_free(*net.truth_gpu); + if (net.input_gpu) free(net.input_gpu); + if (net.truth_gpu) free(net.truth_gpu); - if (*net.input16_gpu) cuda_free(*net.input16_gpu); - if (*net.output16_gpu) cuda_free(*net.output16_gpu); - if (net.input16_gpu) free(net.input16_gpu); - if (net.output16_gpu) free(net.output16_gpu); - if (net.max_input16_size) free(net.max_input16_size); - if (net.max_output16_size) free(net.max_output16_size); + if (*net.input16_gpu) cuda_free(*net.input16_gpu); + if (*net.output16_gpu) cuda_free(*net.output16_gpu); + if (net.input16_gpu) free(net.input16_gpu); + if (net.output16_gpu) free(net.output16_gpu); + if (net.max_input16_size) free(net.max_input16_size); + if (net.max_output16_size) free(net.max_output16_size); #else - free(net.workspace); + free(net.workspace); #endif } void fuse_conv_batchnorm(network net) { - int j; - for (j = 0; j < net.n; ++j) { - layer *l = &net.layers[j]; + int j; + for (j = 0; j < net.n; ++j) { + layer *l = &net.layers[j]; - if (l->type == CONVOLUTIONAL) { - //printf(" Merges Convolutional-%d and batch_norm \n", j); + if (l->type == CONVOLUTIONAL) { + //printf(" Merges Convolutional-%d and batch_norm \n", j); - if (l->batch_normalize) { - int f; - for (f = 0; f < l->n; ++f) - { - l->biases[f] = l->biases[f] - (double)l->scales[f] * l->rolling_mean[f] / (sqrt((double)l->rolling_variance[f]) + .000001f); + if (l->batch_normalize) { + int f; + for (f = 0; f < l->n; ++f) + { + l->biases[f] = l->biases[f] - (double)l->scales[f] * l->rolling_mean[f] / (sqrt((double)l->rolling_variance[f]) + .000001f); - const size_t filter_size = l->size*l->size*l->c; - int i; - for (i = 0; i < filter_size; ++i) { - int w_index = f*filter_size + i; + const size_t filter_size = l->size*l->size*l->c; + int i; + for (i = 0; i < filter_size; ++i) { + int w_index = f*filter_size + i; - l->weights[w_index] = (double)l->weights[w_index] * l->scales[f] / (sqrt((double)l->rolling_variance[f]) + .000001f); - } - } + l->weights[w_index] = (double)l->weights[w_index] * l->scales[f] / (sqrt((double)l->rolling_variance[f]) + .000001f); + } + } - l->batch_normalize = 0; + l->batch_normalize = 0; #ifdef GPU - if (gpu_index >= 0) { - push_convolutional_layer(*l); - } + if (gpu_index >= 0) { + push_convolutional_layer(*l); + } #endif - } - } - else { - //printf(" Fusion skip layer type: %d \n", l->type); - } - } + } + } + else { + //printf(" Fusion skip layer type: %d \n", l->type); + } + } } diff --git a/src/network_kernels.cu b/src/network_kernels.cu index a11d61f3..681542f2 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -55,23 +55,23 @@ void forward_network_gpu(network net, network_state state) fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1); } l.forward_gpu(l, state); - if(net.wait_stream) - cudaStreamSynchronize(get_cuda_stream()); + if(net.wait_stream) + cudaStreamSynchronize(get_cuda_stream()); state.input = l.output_gpu; /* - cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); - if (l.out_w >= 0 && l.out_h >= 1 && l.c >= 3) { - int j; - for (j = 0; j < l.out_c; ++j) { - image img = make_image(l.out_w, l.out_h, 3); - memcpy(img.data, l.output+ l.out_w*l.out_h*j, l.out_w*l.out_h * 1 * sizeof(float)); - char buff[256]; - sprintf(buff, "layer-%d slice-%d", i, j); - show_image(img, buff); - } - cvWaitKey(0); // wait press-key in console - cvDestroyAllWindows(); - } + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + if (l.out_w >= 0 && l.out_h >= 1 && l.c >= 3) { + int j; + for (j = 0; j < l.out_c; ++j) { + image img = make_image(l.out_w, l.out_h, 3); + memcpy(img.data, l.output+ l.out_w*l.out_h*j, l.out_w*l.out_h * 1 * sizeof(float)); + char buff[256]; + sprintf(buff, "layer-%d slice-%d", i, j); + show_image(img, buff); + } + cvWaitKey(0); // wait press-key in console + cvDestroyAllWindows(); + } */ } } @@ -133,14 +133,14 @@ void forward_backward_network_gpu(network net, float *x, float *y) state.truth = *net.truth_gpu; state.train = 1; #ifdef CUDNN_HALF - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - cuda_convert_f32_to_f16(l.weights_gpu, l.c*l.n*l.size*l.size, l.weights_gpu16); - } + int i; + for (i = 0; i < net.n; ++i) { + layer l = net.layers[i]; + cuda_convert_f32_to_f16(l.weights_gpu, l.c*l.n*l.size*l.size, l.weights_gpu16); + } #endif forward_network_gpu(net, state); - //cudaStreamSynchronize(get_cuda_stream()); + //cudaStreamSynchronize(get_cuda_stream()); backward_network_gpu(net, state); } @@ -421,8 +421,8 @@ float *get_network_output_gpu(network net) float *network_predict_gpu(network net, float *input) { - if (net.gpu_index != cuda_get_device()) - cuda_set_device(net.gpu_index); + if (net.gpu_index != cuda_get_device()) + cuda_set_device(net.gpu_index); int size = get_network_input_size(net) * net.batch; network_state state; state.index = 0; diff --git a/src/option_list.c b/src/option_list.c index 3e835de2..dfab81b0 100644 --- a/src/option_list.c +++ b/src/option_list.c @@ -34,21 +34,21 @@ list *read_data_cfg(char *filename) metadata get_metadata(char *file) { - metadata m = { 0 }; - list *options = read_data_cfg(file); + metadata m = { 0 }; + list *options = read_data_cfg(file); - char *name_list = option_find_str(options, "names", 0); - if (!name_list) name_list = option_find_str(options, "labels", 0); - if (!name_list) { - fprintf(stderr, "No names or labels found\n"); - } - else { - m.names = get_labels(name_list); - } - m.classes = option_find_int(options, "classes", 2); - free_list(options); - printf("Loaded - names_list: %s, classes = %d \n", name_list, m.classes); - return m; + char *name_list = option_find_str(options, "names", 0); + if (!name_list) name_list = option_find_str(options, "labels", 0); + if (!name_list) { + fprintf(stderr, "No names or labels found\n"); + } + else { + m.names = get_labels(name_list); + } + m.classes = option_find_int(options, "classes", 2); + free_list(options); + printf("Loaded - names_list: %s, classes = %d \n", name_list, m.classes); + return m; } int read_option(char *s, list *options) diff --git a/src/parser.c b/src/parser.c index 1a324078..d91b1cab 100644 --- a/src/parser.c +++ b/src/parser.c @@ -49,7 +49,7 @@ LAYER_TYPE string_to_layer_type(char * type) if (strcmp(type, "[cost]")==0) return COST; if (strcmp(type, "[detection]")==0) return DETECTION; if (strcmp(type, "[region]")==0) return REGION; - if (strcmp(type, "[yolo]") == 0) return YOLO; + if (strcmp(type, "[yolo]") == 0) return YOLO; if (strcmp(type, "[local]")==0) return LOCAL; if (strcmp(type, "[conv]")==0 || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; @@ -64,7 +64,7 @@ LAYER_TYPE string_to_layer_type(char * type) if (strcmp(type, "[max]")==0 || strcmp(type, "[maxpool]")==0) return MAXPOOL; if (strcmp(type, "[reorg]")==0) return REORG; - if (strcmp(type, "[reorg_old]") == 0) return REORG_OLD; + if (strcmp(type, "[reorg_old]") == 0) return REORG_OLD; if (strcmp(type, "[avg]")==0 || strcmp(type, "[avgpool]")==0) return AVGPOOL; if (strcmp(type, "[dropout]")==0) return DROPOUT; @@ -74,7 +74,7 @@ LAYER_TYPE string_to_layer_type(char * type) if (strcmp(type, "[soft]")==0 || strcmp(type, "[softmax]")==0) return SOFTMAX; if (strcmp(type, "[route]")==0) return ROUTE; - if (strcmp(type, "[upsample]") == 0) return UPSAMPLE; + if (strcmp(type, "[upsample]") == 0) return UPSAMPLE; return BLANK; } @@ -241,68 +241,68 @@ softmax_layer parse_softmax(list *options, size_params params) int *parse_yolo_mask(char *a, int *num) { - int *mask = 0; - if (a) { - int len = strlen(a); - int n = 1; - int i; - for (i = 0; i < len; ++i) { - if (a[i] == ',') ++n; - } - mask = calloc(n, sizeof(int)); - for (i = 0; i < n; ++i) { - int val = atoi(a); - mask[i] = val; - a = strchr(a, ',') + 1; - } - *num = n; - } - return mask; + int *mask = 0; + if (a) { + int len = strlen(a); + int n = 1; + int i; + for (i = 0; i < len; ++i) { + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for (i = 0; i < n; ++i) { + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',') + 1; + } + *num = n; + } + return mask; } layer parse_yolo(list *options, size_params params) { - int classes = option_find_int(options, "classes", 20); - int total = option_find_int(options, "num", 1); - int num = total; + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; - char *a = option_find_str(options, "mask", 0); - int *mask = parse_yolo_mask(a, &num); - int max_boxes = option_find_int_quiet(options, "max", 90); - layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); - if (l.outputs != params.inputs) { - printf("Error: l.outputs == params.inputs \n"); - printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer \n"); - exit(EXIT_FAILURE); - } - //assert(l.outputs == params.inputs); + char *a = option_find_str(options, "mask", 0); + int *mask = parse_yolo_mask(a, &num); + int max_boxes = option_find_int_quiet(options, "max", 90); + layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); + if (l.outputs != params.inputs) { + printf("Error: l.outputs == params.inputs \n"); + printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer \n"); + exit(EXIT_FAILURE); + } + //assert(l.outputs == params.inputs); - //l.max_boxes = option_find_int_quiet(options, "max", 90); - l.jitter = option_find_float(options, "jitter", .2); - l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); + //l.max_boxes = option_find_int_quiet(options, "max", 90); + l.jitter = option_find_float(options, "jitter", .2); + l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); - l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); - l.truth_thresh = option_find_float(options, "truth_thresh", 1); - l.random = option_find_int_quiet(options, "random", 0); + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.random = option_find_int_quiet(options, "random", 0); - char *map_file = option_find_str(options, "map", 0); - if (map_file) l.map = read_map(map_file); + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); - a = option_find_str(options, "anchors", 0); - if (a) { - int len = strlen(a); - int n = 1; - int i; - for (i = 0; i < len; ++i) { - if (a[i] == ',') ++n; - } - for (i = 0; i < n && i < total*2; ++i) { - float bias = atof(a); - l.biases[i] = bias; - a = strchr(a, ',') + 1; - } - } - return l; + a = option_find_str(options, "anchors", 0); + if (a) { + int len = strlen(a); + int n = 1; + int i; + for (i = 0; i < len; ++i) { + if (a[i] == ',') ++n; + } + for (i = 0; i < n && i < total*2; ++i) { + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',') + 1; + } + } + return l; } layer parse_region(list *options, size_params params) @@ -310,21 +310,21 @@ layer parse_region(list *options, size_params params) int coords = option_find_int(options, "coords", 4); int classes = option_find_int(options, "classes", 20); int num = option_find_int(options, "num", 1); - int max_boxes = option_find_int_quiet(options, "max", 90); + int max_boxes = option_find_int_quiet(options, "max", 90); layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes); - if (l.outputs != params.inputs) { - printf("Error: l.outputs == params.inputs \n"); - printf("filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer \n"); - exit(EXIT_FAILURE); - } + if (l.outputs != params.inputs) { + printf("Error: l.outputs == params.inputs \n"); + printf("filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer \n"); + exit(EXIT_FAILURE); + } //assert(l.outputs == params.inputs); l.log = option_find_int_quiet(options, "log", 0); l.sqrt = option_find_int_quiet(options, "sqrt", 0); l.softmax = option_find_int(options, "softmax", 0); - l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); + l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); //l.max_boxes = option_find_int_quiet(options, "max",30); l.jitter = option_find_float(options, "jitter", .2); l.rescore = option_find_int_quiet(options, "rescore",0); @@ -337,7 +337,7 @@ layer parse_region(list *options, size_params params) l.coord_scale = option_find_float(options, "coord_scale", 1); l.object_scale = option_find_float(options, "object_scale", 1); l.noobject_scale = option_find_float(options, "noobject_scale", 1); - l.mask_scale = option_find_float(options, "mask_scale", 1); + l.mask_scale = option_find_float(options, "mask_scale", 1); l.class_scale = option_find_float(options, "class_scale", 1); l.bias_match = option_find_int_quiet(options, "bias_match",0); @@ -438,19 +438,19 @@ layer parse_reorg(list *options, size_params params) layer parse_reorg_old(list *options, size_params params) { - printf("\n reorg_old \n"); - int stride = option_find_int(options, "stride", 1); - int reverse = option_find_int_quiet(options, "reverse", 0); + printf("\n reorg_old \n"); + int stride = option_find_int(options, "stride", 1); + int reverse = option_find_int_quiet(options, "reverse", 0); - int batch, h, w, c; - h = params.h; - w = params.w; - c = params.c; - batch = params.batch; - if (!(h && w && c)) error("Layer before reorg layer must output image."); + int batch, h, w, c; + h = params.h; + w = params.w; + c = params.c; + batch = params.batch; + if (!(h && w && c)) error("Layer before reorg layer must output image."); - layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse); - return layer; + layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse); + return layer; } maxpool_layer parse_maxpool(list *options, size_params params) @@ -547,10 +547,10 @@ layer parse_activation(list *options, size_params params) layer parse_upsample(list *options, size_params params, network net) { - int stride = option_find_int(options, "stride", 2); - layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); - l.scale = option_find_float_quiet(options, "scale", 1); - return l; + int stride = option_find_int(options, "stride", 2); + layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l.scale = option_find_float_quiet(options, "scale", 1); + return l; } route_layer parse_route(list *options, size_params params, network net) @@ -632,15 +632,15 @@ void parse_net_options(list *options, network *net) net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); net->min_crop = option_find_int_quiet(options, "min_crop",net->w); - net->flip = option_find_int_quiet(options, "flip", 1); + net->flip = option_find_int_quiet(options, "flip", 1); - net->small_object = option_find_int_quiet(options, "small_object", 0); + net->small_object = option_find_int_quiet(options, "small_object", 0); net->angle = option_find_float_quiet(options, "angle", 0); net->aspect = option_find_float_quiet(options, "aspect", 1); net->saturation = option_find_float_quiet(options, "saturation", 1); net->exposure = option_find_float_quiet(options, "exposure", 1); net->hue = option_find_float_quiet(options, "hue", 0); - net->power = option_find_float_quiet(options, "power", 4); + net->power = option_find_float_quiet(options, "power", 4); if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); @@ -648,7 +648,7 @@ void parse_net_options(list *options, network *net) net->policy = get_policy(policy_s); net->burn_in = option_find_int_quiet(options, "burn_in", 0); #ifdef CUDNN_HALF - net->burn_in = 0; + net->burn_in = 0; #endif if(net->policy == STEP){ net->step = option_find_int(options, "step", 1); @@ -696,7 +696,7 @@ int is_network(section *s) network parse_network_cfg(char *filename) { - return parse_network_cfg_custom(filename, 0); + return parse_network_cfg_custom(filename, 0); } network parse_network_cfg_custom(char *filename, int batch) @@ -717,12 +717,12 @@ network parse_network_cfg_custom(char *filename, int batch) params.w = net.w; params.c = net.c; params.inputs = net.inputs; - if (batch > 0) net.batch = batch; + if (batch > 0) net.batch = batch; params.batch = net.batch; params.time_steps = net.time_steps; params.net = net; - float bflops = 0; + float bflops = 0; size_t workspace_size = 0; n = n->next; int count = 0; @@ -755,8 +755,8 @@ network parse_network_cfg_custom(char *filename, int batch) l = parse_cost(options, params); }else if(lt == REGION){ l = parse_region(options, params); - }else if (lt == YOLO) { - l = parse_yolo(options, params); + }else if (lt == YOLO) { + l = parse_yolo(options, params); }else if(lt == DETECTION){ l = parse_detection(options, params); }else if(lt == SOFTMAX){ @@ -769,15 +769,15 @@ network parse_network_cfg_custom(char *filename, int batch) }else if(lt == MAXPOOL){ l = parse_maxpool(options, params); }else if(lt == REORG){ - l = parse_reorg(options, params); } - else if (lt == REORG_OLD) { - l = parse_reorg_old(options, params); + l = parse_reorg(options, params); } + else if (lt == REORG_OLD) { + l = parse_reorg_old(options, params); }else if(lt == AVGPOOL){ l = parse_avgpool(options, params); }else if(lt == ROUTE){ l = parse_route(options, params, net); - }else if (lt == UPSAMPLE) { - l = parse_upsample(options, params, net); + }else if (lt == UPSAMPLE) { + l = parse_upsample(options, params, net); }else if(lt == SHORTCUT){ l = parse_shortcut(options, params, net); }else if(lt == DROPOUT){ @@ -807,12 +807,12 @@ network parse_network_cfg_custom(char *filename, int batch) params.c = l.out_c; params.inputs = l.outputs; } - if (l.bflops > 0) bflops += l.bflops; + if (l.bflops > 0) bflops += l.bflops; } free_list(sections); net.outputs = get_network_output_size(net); net.output = get_network_output(net); - printf("Total BFLOPS %5.3f \n", bflops); + printf("Total BFLOPS %5.3f \n", bflops); if(workspace_size){ //printf("%ld\n", workspace_size); #ifdef GPU @@ -825,11 +825,11 @@ network parse_network_cfg_custom(char *filename, int batch) net.workspace = calloc(1, workspace_size); #endif } - LAYER_TYPE lt = net.layers[net.n - 1].type; - if ((net.w % 32 != 0 || net.h % 32 != 0) && (lt == YOLO || lt == REGION || lt == DETECTION)) { - printf("\n Warning: width=%d and height=%d in cfg-file must be divisible by 32 for default networks Yolo v1/v2/v3!!! \n\n", - net.w, net.h); - } + LAYER_TYPE lt = net.layers[net.n - 1].type; + if ((net.w % 32 != 0 || net.h % 32 != 0) && (lt == YOLO || lt == REGION || lt == DETECTION)) { + printf("\n Warning: width=%d and height=%d in cfg-file must be divisible by 32 for default networks Yolo v1/v2/v3!!! \n\n", + net.w, net.h); + } return net; } @@ -1160,16 +1160,16 @@ void load_weights_upto(network *net, char *filename, int cutoff) fread(&major, sizeof(int), 1, fp); fread(&minor, sizeof(int), 1, fp); fread(&revision, sizeof(int), 1, fp); - if ((major * 10 + minor) >= 2) { - printf("\n seen 64 \n"); - uint64_t iseen = 0; - fread(&iseen, sizeof(uint64_t), 1, fp); - *net->seen = iseen; - } - else { - printf("\n seen 32 \n"); - fread(net->seen, sizeof(int), 1, fp); - } + if ((major * 10 + minor) >= 2) { + printf("\n seen 64 \n"); + uint64_t iseen = 0; + fread(&iseen, sizeof(uint64_t), 1, fp); + *net->seen = iseen; + } + else { + printf("\n seen 32 \n"); + fread(net->seen, sizeof(int), 1, fp); + } int transpose = (major > 1000) || (minor > 1000); int i; diff --git a/src/region_layer.c b/src/region_layer.c index 4e1e03a5..5167fb81 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -27,7 +27,7 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int l.bias_updates = calloc(n*2, sizeof(float)); l.outputs = h*w*n*(classes + coords + 1); l.inputs = l.outputs; - l.max_boxes = max_boxes; + l.max_boxes = max_boxes; l.truths = max_boxes*(5); l.delta = calloc(batch*l.outputs, sizeof(float)); l.output = calloc(batch*l.outputs, sizeof(float)); @@ -53,8 +53,8 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int void resize_region_layer(layer *l, int w, int h) { - int old_w = l->w; - int old_h = l->h; + int old_w = l->w; + int old_h = l->h; l->w = w; l->h = h; @@ -65,13 +65,13 @@ void resize_region_layer(layer *l, int w, int h) l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); #ifdef GPU - if (old_w < w || old_h < h) { - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); + if (old_w < w || old_h < h) { + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); - l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); - l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); - } + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } #endif } @@ -127,34 +127,34 @@ void delta_region_class(float *output, float *delta, int index, int class_id, in class_id = hier->parent[class_id]; } *avg_cat += pred; - } else { - // Focal loss - if (focal_loss) { - // Focal Loss - float alpha = 0.5; // 0.25 or 0.5 - //float gamma = 2; // hardcoded in many places of the grad-formula + } else { + // Focal loss + if (focal_loss) { + // Focal Loss + float alpha = 0.5; // 0.25 or 0.5 + //float gamma = 2; // hardcoded in many places of the grad-formula - int ti = index + class_id; - float pt = output[ti] + 0.000000000000001F; - // http://fooplot.com/#W3sidHlwZSI6MCwiZXEiOiItKDEteCkqKDIqeCpsb2coeCkreC0xKSIsImNvbG9yIjoiIzAwMDAwMCJ9LHsidHlwZSI6MTAwMH1d - float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832 - //float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss + int ti = index + class_id; + float pt = output[ti] + 0.000000000000001F; + // http://fooplot.com/#W3sidHlwZSI6MCwiZXEiOiItKDEteCkqKDIqeCpsb2coeCkreC0xKSIsImNvbG9yIjoiIzAwMDAwMCJ9LHsidHlwZSI6MTAwMH1d + float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832 + //float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss - for (n = 0; n < classes; ++n) { - delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]); + for (n = 0; n < classes; ++n) { + delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]); - delta[index + n] *= alpha*grad; + delta[index + n] *= alpha*grad; - if (n == class_id) *avg_cat += output[index + n]; - } - } - else { - // default - for (n = 0; n < classes; ++n) { - delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]); - if (n == class_id) *avg_cat += output[index + n]; - } - } + if (n == class_id) *avg_cat += output[index + n]; + } + } + else { + // default + for (n = 0; n < classes; ++n) { + delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]); + if (n == class_id) *avg_cat += output[index + n]; + } + } } } @@ -170,9 +170,9 @@ float tisnan(float x) static int entry_index(layer l, int batch, int location, int entry) { - int n = location / (l.w*l.h); - int loc = location % (l.w*l.h); - return batch*l.outputs + n*l.w*l.h*(l.coords + l.classes + 1) + entry*l.w*l.h + loc; + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(l.coords + l.classes + 1) + entry*l.w*l.h + loc; } void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output); @@ -256,8 +256,8 @@ void forward_region_layer(const region_layer l, network_state state) int best_class_id = -1; for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(state.truth + t*5 + b*l.truths); - int class_id = state.truth[t * 5 + b*l.truths + 4]; - if (class_id >= l.classes) continue; // if label contains class_id more than number of classes in the cfg-file + int class_id = state.truth[t * 5 + b*l.truths + 4]; + if (class_id >= l.classes) continue; // if label contains class_id more than number of classes in the cfg-file if(!truth.x) break; float iou = box_iou(pred, truth); if (iou > best_iou) { @@ -295,12 +295,12 @@ void forward_region_layer(const region_layer l, network_state state) } for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(state.truth + t*5 + b*l.truths); - int class_id = state.truth[t * 5 + b*l.truths + 4]; - if (class_id >= l.classes) { - printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1); - getchar(); - continue; // if label contains class_id more than number of classes in the cfg-file - } + int class_id = state.truth[t * 5 + b*l.truths + 4]; + if (class_id >= l.classes) { + printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1); + getchar(); + continue; // if label contains class_id more than number of classes in the cfg-file + } if(!truth.x) break; float best_iou = 0; @@ -450,7 +450,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state) cuda_pull_array(state.truth, truth_cpu, num_truth); } cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs); - //cudaStreamSynchronize(get_cuda_stream()); + //cudaStreamSynchronize(get_cuda_stream()); network_state cpu_state = state; cpu_state.train = state.train; cpu_state.truth = truth_cpu; @@ -460,7 +460,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state) free(cpu_state.input); if(!state.train) return; cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); - //cudaStreamSynchronize(get_cuda_stream()); + //cudaStreamSynchronize(get_cuda_stream()); if(cpu_state.truth) free(cpu_state.truth); } @@ -473,107 +473,107 @@ void backward_region_layer_gpu(region_layer l, network_state state) void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) { - int i; - int new_w = 0; - int new_h = 0; - if (((float)netw / w) < ((float)neth / h)) { - new_w = netw; - new_h = (h * netw) / w; - } - else { - new_h = neth; - new_w = (w * neth) / h; - } - for (i = 0; i < n; ++i) { - box b = dets[i].bbox; - b.x = (b.x - (netw - new_w) / 2. / netw) / ((float)new_w / netw); - b.y = (b.y - (neth - new_h) / 2. / neth) / ((float)new_h / neth); - b.w *= (float)netw / new_w; - b.h *= (float)neth / new_h; - if (!relative) { - b.x *= w; - b.w *= w; - b.y *= h; - b.h *= h; - } - dets[i].bbox = b; - } + int i; + int new_w = 0; + int new_h = 0; + if (((float)netw / w) < ((float)neth / h)) { + new_w = netw; + new_h = (h * netw) / w; + } + else { + new_h = neth; + new_w = (w * neth) / h; + } + for (i = 0; i < n; ++i) { + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w) / 2. / netw) / ((float)new_w / netw); + b.y = (b.y - (neth - new_h) / 2. / neth) / ((float)new_h / neth); + b.w *= (float)netw / new_w; + b.h *= (float)neth / new_h; + if (!relative) { + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } } void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) { - int i, j, n, z; - float *predictions = l.output; - if (l.batch == 2) { - float *flip = l.output + l.outputs; - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w / 2; ++i) { - for (n = 0; n < l.n; ++n) { - for (z = 0; z < l.classes + l.coords + 1; ++z) { - int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; - int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); - float swap = flip[i1]; - flip[i1] = flip[i2]; - flip[i2] = swap; - if (z == 0) { - flip[i1] = -flip[i1]; - flip[i2] = -flip[i2]; - } - } - } - } - } - for (i = 0; i < l.outputs; ++i) { - l.output[i] = (l.output[i] + flip[i]) / 2.; - } - } - for (i = 0; i < l.w*l.h; ++i) { - int row = i / l.w; - int col = i % l.w; - for (n = 0; n < l.n; ++n) { - int index = n*l.w*l.h + i; - for (j = 0; j < l.classes; ++j) { - dets[index].prob[j] = 0; - } - int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); - int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); - int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); - float scale = l.background ? 1 : predictions[obj_index]; - dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h);// , l.w*l.h); - dets[index].objectness = scale > thresh ? scale : 0; - if (dets[index].mask) { - for (j = 0; j < l.coords - 4; ++j) { - dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; - } - } + int i, j, n, z; + float *predictions = l.output; + if (l.batch == 2) { + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w / 2; ++i) { + for (n = 0; n < l.n; ++n) { + for (z = 0; z < l.classes + l.coords + 1; ++z) { + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if (z == 0) { + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for (i = 0; i < l.outputs; ++i) { + l.output[i] = (l.output[i] + flip[i]) / 2.; + } + } + for (i = 0; i < l.w*l.h; ++i) { + int row = i / l.w; + int col = i % l.w; + for (n = 0; n < l.n; ++n) { + int index = n*l.w*l.h + i; + for (j = 0; j < l.classes; ++j) { + dets[index].prob[j] = 0; + } + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float scale = l.background ? 1 : predictions[obj_index]; + dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h);// , l.w*l.h); + dets[index].objectness = scale > thresh ? scale : 0; + if (dets[index].mask) { + for (j = 0; j < l.coords - 4; ++j) { + dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; + } + } - int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); - if (l.softmax_tree) { + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); + if (l.softmax_tree) { - hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);// , l.w*l.h); - if (map) { - for (j = 0; j < 200; ++j) { - int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); - float prob = scale*predictions[class_index]; - dets[index].prob[j] = (prob > thresh) ? prob : 0; - } - } - else { - int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); - dets[index].prob[j] = (scale > thresh) ? scale : 0; - } - } - else { - if (dets[index].objectness) { - for (j = 0; j < l.classes; ++j) { - int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); - float prob = scale*predictions[class_index]; - dets[index].prob[j] = (prob > thresh) ? prob : 0; - } - } - } - } - } - correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); + hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);// , l.w*l.h); + if (map) { + for (j = 0; j < 200; ++j) { + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + else { + int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); + dets[index].prob[j] = (scale > thresh) ? scale : 0; + } + } + else { + if (dets[index].objectness) { + for (j = 0; j < l.classes; ++j) { + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } + } + } + correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); } diff --git a/src/reorg_layer.c b/src/reorg_layer.c index 05bab1ea..c298b400 100644 --- a/src/reorg_layer.c +++ b/src/reorg_layer.c @@ -77,42 +77,42 @@ void resize_reorg_layer(layer *l, int w, int h) void forward_reorg_layer(const layer l, network_state state) { - if (l.reverse) { - reorg_cpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, l.output); - } - else { - reorg_cpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, l.output); - } + if (l.reverse) { + reorg_cpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, l.output); + } + else { + reorg_cpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, l.output); + } } void backward_reorg_layer(const layer l, network_state state) { - if (l.reverse) { - reorg_cpu(l.delta, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, state.delta); - } - else { - reorg_cpu(l.delta, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, state.delta); - } + if (l.reverse) { + reorg_cpu(l.delta, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, state.delta); + } + else { + reorg_cpu(l.delta, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, state.delta); + } } #ifdef GPU void forward_reorg_layer_gpu(layer l, network_state state) { - if (l.reverse) { - reorg_ongpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, l.output_gpu); - } - else { - reorg_ongpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, l.output_gpu); - } + if (l.reverse) { + reorg_ongpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, l.output_gpu); + } + else { + reorg_ongpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, l.output_gpu); + } } void backward_reorg_layer_gpu(layer l, network_state state) { - if (l.reverse) { - reorg_ongpu(l.delta_gpu, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, state.delta); - } - else { - reorg_ongpu(l.delta_gpu, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, state.delta); - } + if (l.reverse) { + reorg_ongpu(l.delta_gpu, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, state.delta); + } + else { + reorg_ongpu(l.delta_gpu, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, state.delta); + } } #endif diff --git a/src/reorg_old_layer.c b/src/reorg_old_layer.c index c55cf7c2..30206d9c 100644 --- a/src/reorg_old_layer.c +++ b/src/reorg_old_layer.c @@ -77,42 +77,42 @@ void resize_reorg_old_layer(layer *l, int w, int h) void forward_reorg_old_layer(const layer l, network_state state) { - if (l.reverse) { - reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); - } - else { - reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); - } + if (l.reverse) { + reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); + } + else { + reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); + } } void backward_reorg_old_layer(const layer l, network_state state) { - if (l.reverse) { - reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta); - } - else { - reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta); - } + if (l.reverse) { + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta); + } + else { + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta); + } } #ifdef GPU void forward_reorg_old_layer_gpu(layer l, network_state state) { - if (l.reverse) { - reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); - } - else { - reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); - } + if (l.reverse) { + reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); + } + else { + reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); + } } void backward_reorg_old_layer_gpu(layer l, network_state state) { - if (l.reverse) { - reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta); - } - else { - reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta); - } + if (l.reverse) { + reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta); + } + else { + reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta); + } } #endif diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index 87b4b089..9fa18db8 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -38,20 +38,20 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int void resize_shortcut_layer(layer *l, int w, int h) { - //assert(l->w == l->out_w); - //assert(l->h == l->out_h); - l->w = l->out_w = w; - l->h = l->out_h = h; - l->outputs = w*h*l->out_c; - l->inputs = l->outputs; - l->delta = realloc(l->delta, l->outputs*l->batch * sizeof(float)); - l->output = realloc(l->output, l->outputs*l->batch * sizeof(float)); + //assert(l->w == l->out_w); + //assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch * sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch * sizeof(float)); #ifdef GPU - cuda_free(l->output_gpu); - cuda_free(l->delta_gpu); - l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); - l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); #endif } diff --git a/src/tree.c b/src/tree.c index 35ac3de1..d66da9f8 100644 --- a/src/tree.c +++ b/src/tree.c @@ -52,34 +52,34 @@ void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leave int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) { - float p = 1; - int group = 0; - int i; - while (1) { - float max = 0; - int max_i = 0; + float p = 1; + int group = 0; + int i; + while (1) { + float max = 0; + int max_i = 0; - for (i = 0; i < hier->group_size[group]; ++i) { - int index = i + hier->group_offset[group]; - float val = predictions[(i + hier->group_offset[group])*stride]; - if (val > max) { - max_i = index; - max = val; - } - } - if (p*max > thresh) { - p = p*max; - group = hier->child[max_i]; - if (hier->child[max_i] < 0) return max_i; - } - else if (group == 0) { - return max_i; - } - else { - return hier->parent[hier->group_offset[group]]; - } - } - return 0; + for (i = 0; i < hier->group_size[group]; ++i) { + int index = i + hier->group_offset[group]; + float val = predictions[(i + hier->group_offset[group])*stride]; + if (val > max) { + max_i = index; + max = val; + } + } + if (p*max > thresh) { + p = p*max; + group = hier->child[max_i]; + if (hier->child[max_i] < 0) return max_i; + } + else if (group == 0) { + return max_i; + } + else { + return hier->parent[hier->group_offset[group]]; + } + } + return 0; } tree *read_tree(char *filename) diff --git a/src/utils.c b/src/utils.c index 5dca2123..3af7c970 100644 --- a/src/utils.c +++ b/src/utils.c @@ -18,11 +18,11 @@ double what_time_is_it_now() { - struct timeval time; - if (gettimeofday(&time, NULL)) { - return 0; - } - return (double)time.tv_sec + (double)time.tv_usec * .000001; + struct timeval time; + if (gettimeofday(&time, NULL)) { + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; } int *read_map(char *filename) @@ -57,7 +57,7 @@ void shuffle(void *arr, size_t n, size_t size) void *swp = calloc(1, size); for(i = 0; i < n-1; ++i){ size_t j = i + rand()/(RAND_MAX / (n-i)+1); - memcpy(swp, (char*)arr+(j*size), size); + memcpy(swp, (char*)arr+(j*size), size); memcpy((char*)arr+(j*size), (char*)arr+(i*size), size); memcpy((char*)arr+(i*size), swp, size); } @@ -137,7 +137,7 @@ char *basecfg(char *cfgfile) { c = next+1; } - if(!next) while ((next = strchr(c, '\\'))) { c = next + 1; } + if(!next) while ((next = strchr(c, '\\'))) { c = next + 1; } c = copy_string(c); next = strchr(c, '.'); if (next) *next = 0; @@ -169,63 +169,63 @@ void pm(int M, int N, float *A) void find_replace(char *str, char *orig, char *rep, char *output) { - char *buffer = calloc(8192, sizeof(char)); + char *buffer = calloc(8192, sizeof(char)); char *p; sprintf(buffer, "%s", str); if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? sprintf(output, "%s", str); - free(buffer); + free(buffer); return; } *p = '\0'; sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); - free(buffer); + free(buffer); } void find_replace_extension(char *str, char *orig, char *rep, char *output) { - char *buffer = calloc(8192, sizeof(char)); + char *buffer = calloc(8192, sizeof(char)); - sprintf(buffer, "%s", str); - char *p = strstr(buffer, orig); - int offset = (p - buffer); - int chars_from_end = strlen(buffer) - offset; - if (!p || chars_from_end != strlen(orig)) { // Is 'orig' even in 'str' AND is 'orig' found at the end of 'str'? - sprintf(output, "%s", str); - free(buffer); - return; - } + sprintf(buffer, "%s", str); + char *p = strstr(buffer, orig); + int offset = (p - buffer); + int chars_from_end = strlen(buffer) - offset; + if (!p || chars_from_end != strlen(orig)) { // Is 'orig' even in 'str' AND is 'orig' found at the end of 'str'? + sprintf(output, "%s", str); + free(buffer); + return; + } - *p = '\0'; + *p = '\0'; - sprintf(output, "%s%s%s", buffer, rep, p + strlen(orig)); - free(buffer); + sprintf(output, "%s%s%s", buffer, rep, p + strlen(orig)); + free(buffer); } void replace_image_to_label(char *input_path, char *output_path) { - //find_replace(input_path, "/images/", "/labels/", output_path); // COCO - find_replace(input_path, "/images/train2014/", "/labels/train2014/", output_path); // COCO - find_replace(output_path, "/images/val2014/", "/labels/val2014/", output_path); // COCO - find_replace(output_path, "/JPEGImages/", "/labels/", output_path); // PascalVOC - //find_replace(output_path, "/VOC2007/JPEGImages/", "/VOC2007/labels/", output_path); // PascalVOC - //find_replace(output_path, "/VOC2012/JPEGImages/", "/VOC2012/labels/", output_path); // PascalVOC + //find_replace(input_path, "/images/", "/labels/", output_path); // COCO + find_replace(input_path, "/images/train2014/", "/labels/train2014/", output_path); // COCO + find_replace(output_path, "/images/val2014/", "/labels/val2014/", output_path); // COCO + find_replace(output_path, "/JPEGImages/", "/labels/", output_path); // PascalVOC + //find_replace(output_path, "/VOC2007/JPEGImages/", "/VOC2007/labels/", output_path); // PascalVOC + //find_replace(output_path, "/VOC2012/JPEGImages/", "/VOC2012/labels/", output_path); // PascalVOC - //find_replace(output_path, "/raw/", "/labels/", output_path); + //find_replace(output_path, "/raw/", "/labels/", output_path); - // replace only ext of files - find_replace_extension(output_path, ".jpg", ".txt", output_path); - find_replace_extension(output_path, ".JPG", ".txt", output_path); // error - find_replace_extension(output_path, ".jpeg", ".txt", output_path); - find_replace_extension(output_path, ".JPEG", ".txt", output_path); - find_replace_extension(output_path, ".png", ".txt", output_path); - find_replace_extension(output_path, ".PNG", ".txt", output_path); - find_replace_extension(output_path, ".bmp", ".txt", output_path); - find_replace_extension(output_path, ".BMP", ".txt", output_path); - find_replace_extension(output_path, ".ppm", ".txt", output_path); - find_replace_extension(output_path, ".PPM", ".txt", output_path); + // replace only ext of files + find_replace_extension(output_path, ".jpg", ".txt", output_path); + find_replace_extension(output_path, ".JPG", ".txt", output_path); // error + find_replace_extension(output_path, ".jpeg", ".txt", output_path); + find_replace_extension(output_path, ".JPEG", ".txt", output_path); + find_replace_extension(output_path, ".png", ".txt", output_path); + find_replace_extension(output_path, ".PNG", ".txt", output_path); + find_replace_extension(output_path, ".bmp", ".txt", output_path); + find_replace_extension(output_path, ".BMP", ".txt", output_path); + find_replace_extension(output_path, ".ppm", ".txt", output_path); + find_replace_extension(output_path, ".PPM", ".txt", output_path); } float sec(clock_t clocks) @@ -299,15 +299,15 @@ void strip(char *s) void strip_args(char *s) { - size_t i; - size_t len = strlen(s); - size_t offset = 0; - for (i = 0; i < len; ++i) { - char c = s[i]; - if (c == '\t' || c == '\n' || c == '\r' || c == 0x0d || c == 0x0a) ++offset; - else s[i - offset] = c; - } - s[len - offset] = '\0'; + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for (i = 0; i < len; ++i) { + char c = s[i]; + if (c == '\t' || c == '\n' || c == '\r' || c == 0x0d || c == 0x0a) ++offset; + else s[i - offset] = c; + } + s[len - offset] = '\0'; } void strip_char(char *s, char bad) @@ -356,11 +356,11 @@ char *fgetl(FILE *fp) fgets(&line[curr], readsize, fp); curr = strlen(line); } - if(curr >= 2) - if(line[curr-2] == 0x0d) line[curr-2] = 0x00; + if(curr >= 2) + if(line[curr-2] == 0x0d) line[curr-2] = 0x00; - if(curr >= 1) - if(line[curr-1] == 0x0a) line[curr-1] = 0x00; + if(curr >= 1) + if(line[curr-1] == 0x0a) line[curr-1] = 0x00; return line; } @@ -620,11 +620,11 @@ int max_index(float *a, int n) int int_index(int *a, int val, int n) { - int i; - for (i = 0; i < n; ++i) { - if (a[i] == val) return i; - } - return -1; + int i; + for (i = 0; i < n; ++i) { + if (a[i] == val) return i; + } + return -1; } int rand_int(int min, int max) @@ -691,7 +691,7 @@ float rand_uniform(float min, float max) max = swap; } return ((float)rand()/RAND_MAX * (max - min)) + min; - //return (random_float() * (max - min)) + min; + //return (random_float() * (max - min)) + min; } float rand_scale(float s) @@ -715,30 +715,30 @@ float **one_hot_encode(float *a, int n, int k) unsigned int random_gen() { - unsigned int rnd = 0; + unsigned int rnd = 0; #ifdef WIN32 - rand_s(&rnd); + rand_s(&rnd); #else - rnd = rand(); + rnd = rand(); #endif - return rnd; + return rnd; } float random_float() { #ifdef WIN32 - return ((float)random_gen() / (float)UINT_MAX); + return ((float)random_gen() / (float)UINT_MAX); #else - return ((float)random_gen() / (float)RAND_MAX); + return ((float)random_gen() / (float)RAND_MAX); #endif } float rand_uniform_strong(float min, float max) { - if (max < min) { - float swap = min; - min = max; - max = swap; - } - return (random_float() * (max - min)) + min; + if (max < min) { + float swap = min; + min = max; + max = swap; + } + return (random_float() * (max - min)) + min; } \ No newline at end of file diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp index 2eca2678..a724b3e5 100644 --- a/src/yolo_console_dll.cpp +++ b/src/yolo_console_dll.cpp @@ -21,10 +21,10 @@ //#pragma comment(lib, "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1/lib/x64/cudart.lib") //static std::shared_ptr device_ptr(NULL, [](void *img) { cudaDeviceReset(); }); -#include "yolo_v2_class.hpp" // imported functions from DLL +#include "yolo_v2_class.hpp" // imported functions from DLL #ifdef OPENCV -#include // C++ +#include // C++ #include "opencv2/core/version.hpp" #ifndef CV_VERSION_EPOCH #include "opencv2/videoio/videoio.hpp" @@ -36,67 +36,67 @@ #pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib") #pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib") #pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib") -#endif // TRACK_OPTFLOW +#endif // TRACK_OPTFLOW #else #define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)""CVAUX_STR(CV_VERSION_MAJOR)""CVAUX_STR(CV_VERSION_MINOR) #pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib") #pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib") #pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib") -#endif // CV_VERSION_EPOCH +#endif // CV_VERSION_EPOCH class track_kalman { public: - cv::KalmanFilter kf; - int state_size, meas_size, contr_size; + cv::KalmanFilter kf; + int state_size, meas_size, contr_size; - track_kalman(int _state_size = 10, int _meas_size = 10, int _contr_size = 0) - : state_size(_state_size), meas_size(_meas_size), contr_size(_contr_size) - { - kf.init(state_size, meas_size, contr_size, CV_32F); + track_kalman(int _state_size = 10, int _meas_size = 10, int _contr_size = 0) + : state_size(_state_size), meas_size(_meas_size), contr_size(_contr_size) + { + kf.init(state_size, meas_size, contr_size, CV_32F); - cv::setIdentity(kf.measurementMatrix); - cv::setIdentity(kf.measurementNoiseCov, cv::Scalar::all(1e-1)); - cv::setIdentity(kf.processNoiseCov, cv::Scalar::all(1e-5)); - cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2)); - cv::setIdentity(kf.transitionMatrix); - } + cv::setIdentity(kf.measurementMatrix); + cv::setIdentity(kf.measurementNoiseCov, cv::Scalar::all(1e-1)); + cv::setIdentity(kf.processNoiseCov, cv::Scalar::all(1e-5)); + cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2)); + cv::setIdentity(kf.transitionMatrix); + } - void set(std::vector result_vec) { - for (size_t i = 0; i < result_vec.size() && i < state_size*2; ++i) { - kf.statePost.at(i * 2 + 0) = result_vec[i].x; - kf.statePost.at(i * 2 + 1) = result_vec[i].y; - } - } + void set(std::vector result_vec) { + for (size_t i = 0; i < result_vec.size() && i < state_size*2; ++i) { + kf.statePost.at(i * 2 + 0) = result_vec[i].x; + kf.statePost.at(i * 2 + 1) = result_vec[i].y; + } + } - // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre); - // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) - std::vector correct(std::vector result_vec) { - cv::Mat measurement(meas_size, 1, CV_32F); - for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) { - measurement.at(i * 2 + 0) = result_vec[i].x; - measurement.at(i * 2 + 1) = result_vec[i].y; - } - cv::Mat estimated = kf.correct(measurement); - for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) { - result_vec[i].x = estimated.at(i * 2 + 0); - result_vec[i].y = estimated.at(i * 2 + 1); - } - return result_vec; - } + // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre); + // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) + std::vector correct(std::vector result_vec) { + cv::Mat measurement(meas_size, 1, CV_32F); + for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) { + measurement.at(i * 2 + 0) = result_vec[i].x; + measurement.at(i * 2 + 1) = result_vec[i].y; + } + cv::Mat estimated = kf.correct(measurement); + for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) { + result_vec[i].x = estimated.at(i * 2 + 0); + result_vec[i].y = estimated.at(i * 2 + 1); + } + return result_vec; + } - // Kalman.predict() calculates: statePre = TransitionMatrix * statePost; - // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) - std::vector predict() { - std::vector result_vec; - cv::Mat control; - cv::Mat prediction = kf.predict(control); - for (size_t i = 0; i < prediction.rows && i < state_size * 2; ++i) { - result_vec[i].x = prediction.at(i * 2 + 0); - result_vec[i].y = prediction.at(i * 2 + 1); - } - return result_vec; - } + // Kalman.predict() calculates: statePre = TransitionMatrix * statePost; + // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) + std::vector predict() { + std::vector result_vec; + cv::Mat control; + cv::Mat prediction = kf.predict(control); + for (size_t i = 0; i < prediction.rows && i < state_size * 2; ++i) { + result_vec[i].x = prediction.at(i * 2 + 0); + result_vec[i].y = prediction.at(i * 2 + 1); + } + return result_vec; + } }; @@ -105,384 +105,384 @@ public: class extrapolate_coords_t { public: - std::vector old_result_vec; - std::vector dx_vec, dy_vec, time_vec; - std::vector old_dx_vec, old_dy_vec; + std::vector old_result_vec; + std::vector dx_vec, dy_vec, time_vec; + std::vector old_dx_vec, old_dy_vec; - void new_result(std::vector new_result_vec, float new_time) { - old_dx_vec = dx_vec; - old_dy_vec = dy_vec; - if (old_dx_vec.size() != old_result_vec.size()) std::cout << "old_dx != old_res \n"; - dx_vec = std::vector(new_result_vec.size(), 0); - dy_vec = std::vector(new_result_vec.size(), 0); - update_result(new_result_vec, new_time, false); - old_result_vec = new_result_vec; - time_vec = std::vector(new_result_vec.size(), new_time); - } + void new_result(std::vector new_result_vec, float new_time) { + old_dx_vec = dx_vec; + old_dy_vec = dy_vec; + if (old_dx_vec.size() != old_result_vec.size()) std::cout << "old_dx != old_res \n"; + dx_vec = std::vector(new_result_vec.size(), 0); + dy_vec = std::vector(new_result_vec.size(), 0); + update_result(new_result_vec, new_time, false); + old_result_vec = new_result_vec; + time_vec = std::vector(new_result_vec.size(), new_time); + } - void update_result(std::vector new_result_vec, float new_time, bool update = true) { - for (size_t i = 0; i < new_result_vec.size(); ++i) { - for (size_t k = 0; k < old_result_vec.size(); ++k) { - if (old_result_vec[k].track_id == new_result_vec[i].track_id && old_result_vec[k].obj_id == new_result_vec[i].obj_id) { - float const delta_time = new_time - time_vec[k]; - if (abs(delta_time) < 1) break; - size_t index = (update) ? k : i; - float dx = ((float)new_result_vec[i].x - (float)old_result_vec[k].x) / delta_time; - float dy = ((float)new_result_vec[i].y - (float)old_result_vec[k].y) / delta_time; - float old_dx = dx, old_dy = dy; + void update_result(std::vector new_result_vec, float new_time, bool update = true) { + for (size_t i = 0; i < new_result_vec.size(); ++i) { + for (size_t k = 0; k < old_result_vec.size(); ++k) { + if (old_result_vec[k].track_id == new_result_vec[i].track_id && old_result_vec[k].obj_id == new_result_vec[i].obj_id) { + float const delta_time = new_time - time_vec[k]; + if (abs(delta_time) < 1) break; + size_t index = (update) ? k : i; + float dx = ((float)new_result_vec[i].x - (float)old_result_vec[k].x) / delta_time; + float dy = ((float)new_result_vec[i].y - (float)old_result_vec[k].y) / delta_time; + float old_dx = dx, old_dy = dy; - // if it's shaking - if (update) { - if (dx * dx_vec[i] < 0) dx = dx / 2; - if (dy * dy_vec[i] < 0) dy = dy / 2; - } else { - if (dx * old_dx_vec[k] < 0) dx = dx / 2; - if (dy * old_dy_vec[k] < 0) dy = dy / 2; - } - dx_vec[index] = dx; - dy_vec[index] = dy; + // if it's shaking + if (update) { + if (dx * dx_vec[i] < 0) dx = dx / 2; + if (dy * dy_vec[i] < 0) dy = dy / 2; + } else { + if (dx * old_dx_vec[k] < 0) dx = dx / 2; + if (dy * old_dy_vec[k] < 0) dy = dy / 2; + } + dx_vec[index] = dx; + dy_vec[index] = dy; - //if (old_dx == dx && old_dy == dy) std::cout << "not shakin \n"; - //else std::cout << "shakin \n"; + //if (old_dx == dx && old_dy == dy) std::cout << "not shakin \n"; + //else std::cout << "shakin \n"; - if (dx_vec[index] > 1000 || dy_vec[index] > 1000) { - //std::cout << "!!! bad dx or dy, dx = " << dx_vec[index] << ", dy = " << dy_vec[index] << - // ", delta_time = " << delta_time << ", update = " << update << std::endl; - dx_vec[index] = 0; - dy_vec[index] = 0; - } - old_result_vec[k].x = new_result_vec[i].x; - old_result_vec[k].y = new_result_vec[i].y; - time_vec[k] = new_time; - break; - } - } - } - } + if (dx_vec[index] > 1000 || dy_vec[index] > 1000) { + //std::cout << "!!! bad dx or dy, dx = " << dx_vec[index] << ", dy = " << dy_vec[index] << + // ", delta_time = " << delta_time << ", update = " << update << std::endl; + dx_vec[index] = 0; + dy_vec[index] = 0; + } + old_result_vec[k].x = new_result_vec[i].x; + old_result_vec[k].y = new_result_vec[i].y; + time_vec[k] = new_time; + break; + } + } + } + } - std::vector predict(float cur_time) { - std::vector result_vec = old_result_vec; - for (size_t i = 0; i < old_result_vec.size(); ++i) { - float const delta_time = cur_time - time_vec[i]; - auto &bbox = result_vec[i]; - float new_x = (float) bbox.x + dx_vec[i] * delta_time; - float new_y = (float) bbox.y + dy_vec[i] * delta_time; - if (new_x > 0) bbox.x = new_x; - else bbox.x = 0; - if (new_y > 0) bbox.y = new_y; - else bbox.y = 0; - } - return result_vec; - } + std::vector predict(float cur_time) { + std::vector result_vec = old_result_vec; + for (size_t i = 0; i < old_result_vec.size(); ++i) { + float const delta_time = cur_time - time_vec[i]; + auto &bbox = result_vec[i]; + float new_x = (float) bbox.x + dx_vec[i] * delta_time; + float new_y = (float) bbox.y + dy_vec[i] * delta_time; + if (new_x > 0) bbox.x = new_x; + else bbox.x = 0; + if (new_y > 0) bbox.y = new_y; + else bbox.y = 0; + } + return result_vec; + } }; void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector obj_names, - int current_det_fps = -1, int current_cap_fps = -1) + int current_det_fps = -1, int current_cap_fps = -1) { - int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } }; + int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } }; - for (auto &i : result_vec) { - cv::Scalar color = obj_id_to_color(i.obj_id); - cv::rectangle(mat_img, cv::Rect(i.x, i.y, i.w, i.h), color, 2); - if (obj_names.size() > i.obj_id) { - std::string obj_name = obj_names[i.obj_id]; - if (i.track_id > 0) obj_name += " - " + std::to_string(i.track_id); - cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0); - int const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2); - cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 30, 0)), - cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)), - color, CV_FILLED, 8, 0); - putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2); - } - } - if (current_det_fps >= 0 && current_cap_fps >= 0) { - std::string fps_str = "FPS detection: " + std::to_string(current_det_fps) + " FPS capture: " + std::to_string(current_cap_fps); - putText(mat_img, fps_str, cv::Point2f(10, 20), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(50, 255, 0), 2); - } + for (auto &i : result_vec) { + cv::Scalar color = obj_id_to_color(i.obj_id); + cv::rectangle(mat_img, cv::Rect(i.x, i.y, i.w, i.h), color, 2); + if (obj_names.size() > i.obj_id) { + std::string obj_name = obj_names[i.obj_id]; + if (i.track_id > 0) obj_name += " - " + std::to_string(i.track_id); + cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0); + int const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2); + cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 30, 0)), + cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)), + color, CV_FILLED, 8, 0); + putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2); + } + } + if (current_det_fps >= 0 && current_cap_fps >= 0) { + std::string fps_str = "FPS detection: " + std::to_string(current_det_fps) + " FPS capture: " + std::to_string(current_cap_fps); + putText(mat_img, fps_str, cv::Point2f(10, 20), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(50, 255, 0), 2); + } } -#endif // OPENCV +#endif // OPENCV void show_console_result(std::vector const result_vec, std::vector const obj_names) { - for (auto &i : result_vec) { - if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - "; - std::cout << "obj_id = " << i.obj_id << ", x = " << i.x << ", y = " << i.y - << ", w = " << i.w << ", h = " << i.h - << std::setprecision(3) << ", prob = " << i.prob << std::endl; - } + for (auto &i : result_vec) { + if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - "; + std::cout << "obj_id = " << i.obj_id << ", x = " << i.x << ", y = " << i.y + << ", w = " << i.w << ", h = " << i.h + << std::setprecision(3) << ", prob = " << i.prob << std::endl; + } } std::vector objects_names_from_file(std::string const filename) { - std::ifstream file(filename); - std::vector file_lines; - if (!file.is_open()) return file_lines; - for(std::string line; getline(file, line);) file_lines.push_back(line); - std::cout << "object names loaded \n"; - return file_lines; + std::ifstream file(filename); + std::vector file_lines; + if (!file.is_open()) return file_lines; + for(std::string line; getline(file, line);) file_lines.push_back(line); + std::cout << "object names loaded \n"; + return file_lines; } int main(int argc, char *argv[]) { - std::string names_file = "data/coco.names"; - std::string cfg_file = "cfg/yolov3.cfg"; - std::string weights_file = "yolov3.weights"; - std::string filename; + std::string names_file = "data/coco.names"; + std::string cfg_file = "cfg/yolov3.cfg"; + std::string weights_file = "yolov3.weights"; + std::string filename; - if (argc > 4) { //voc.names yolo-voc.cfg yolo-voc.weights test.mp4 - names_file = argv[1]; - cfg_file = argv[2]; - weights_file = argv[3]; - filename = argv[4]; - } - else if (argc > 1) filename = argv[1]; + if (argc > 4) { //voc.names yolo-voc.cfg yolo-voc.weights test.mp4 + names_file = argv[1]; + cfg_file = argv[2]; + weights_file = argv[3]; + filename = argv[4]; + } + else if (argc > 1) filename = argv[1]; - float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.20; + float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.20; - Detector detector(cfg_file, weights_file); + Detector detector(cfg_file, weights_file); - auto obj_names = objects_names_from_file(names_file); - std::string out_videofile = "result.avi"; - bool const save_output_videofile = true; + auto obj_names = objects_names_from_file(names_file); + std::string out_videofile = "result.avi"; + bool const save_output_videofile = true; #ifdef TRACK_OPTFLOW - Tracker_optflow tracker_flow; - detector.wait_stream = true; + Tracker_optflow tracker_flow; + detector.wait_stream = true; #endif - while (true) - { - std::cout << "input image or video filename: "; - if(filename.size() == 0) std::cin >> filename; - if (filename.size() == 0) break; - - try { + while (true) + { + std::cout << "input image or video filename: "; + if(filename.size() == 0) std::cin >> filename; + if (filename.size() == 0) break; + + try { #ifdef OPENCV - extrapolate_coords_t extrapolate_coords; - bool extrapolate_flag = false; - float cur_time_extrapolate = 0, old_time_extrapolate = 0; - preview_boxes_t large_preview(100, 150, false), small_preview(50, 50, true); - bool show_small_boxes = false; + extrapolate_coords_t extrapolate_coords; + bool extrapolate_flag = false; + float cur_time_extrapolate = 0, old_time_extrapolate = 0; + preview_boxes_t large_preview(100, 150, false), small_preview(50, 50, true); + bool show_small_boxes = false; - std::string const file_ext = filename.substr(filename.find_last_of(".") + 1); - std::string const protocol = filename.substr(0, 7); - if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov" || // video file - protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/") // video network stream - { - cv::Mat cap_frame, cur_frame, det_frame, write_frame; - std::queue track_optflow_queue; - int passed_flow_frames = 0; - std::shared_ptr det_image; - std::vector result_vec, thread_result_vec; - detector.nms = 0.02; // comment it - if track_id is not required - std::atomic consumed, videowrite_ready; - bool exit_flag = false; - consumed = true; - videowrite_ready = true; - std::atomic fps_det_counter, fps_cap_counter; - fps_det_counter = 0; - fps_cap_counter = 0; - int current_det_fps = 0, current_cap_fps = 0; - std::thread t_detect, t_cap, t_videowrite; - std::mutex mtx; - std::condition_variable cv_detected, cv_pre_tracked; - std::chrono::steady_clock::time_point steady_start, steady_end; - cv::VideoCapture cap(filename); cap >> cur_frame; - int const video_fps = cap.get(CV_CAP_PROP_FPS); - cv::Size const frame_size = cur_frame.size(); - cv::VideoWriter output_video; - if (save_output_videofile) - output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true); + std::string const file_ext = filename.substr(filename.find_last_of(".") + 1); + std::string const protocol = filename.substr(0, 7); + if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov" || // video file + protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/") // video network stream + { + cv::Mat cap_frame, cur_frame, det_frame, write_frame; + std::queue track_optflow_queue; + int passed_flow_frames = 0; + std::shared_ptr det_image; + std::vector result_vec, thread_result_vec; + detector.nms = 0.02; // comment it - if track_id is not required + std::atomic consumed, videowrite_ready; + bool exit_flag = false; + consumed = true; + videowrite_ready = true; + std::atomic fps_det_counter, fps_cap_counter; + fps_det_counter = 0; + fps_cap_counter = 0; + int current_det_fps = 0, current_cap_fps = 0; + std::thread t_detect, t_cap, t_videowrite; + std::mutex mtx; + std::condition_variable cv_detected, cv_pre_tracked; + std::chrono::steady_clock::time_point steady_start, steady_end; + cv::VideoCapture cap(filename); cap >> cur_frame; + int const video_fps = cap.get(CV_CAP_PROP_FPS); + cv::Size const frame_size = cur_frame.size(); + cv::VideoWriter output_video; + if (save_output_videofile) + output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true); - while (!cur_frame.empty()) - { - // always sync - if (t_cap.joinable()) { - t_cap.join(); - ++fps_cap_counter; - cur_frame = cap_frame.clone(); - } - t_cap = std::thread([&]() { cap >> cap_frame; }); - ++cur_time_extrapolate; + while (!cur_frame.empty()) + { + // always sync + if (t_cap.joinable()) { + t_cap.join(); + ++fps_cap_counter; + cur_frame = cap_frame.clone(); + } + t_cap = std::thread([&]() { cap >> cap_frame; }); + ++cur_time_extrapolate; - // swap result bouned-boxes and input-frame - if(consumed) - { - std::unique_lock lock(mtx); - det_image = detector.mat_to_image_resize(cur_frame); - auto old_result_vec = detector.tracking_id(result_vec); - auto detected_result_vec = thread_result_vec; - result_vec = detected_result_vec; + // swap result bouned-boxes and input-frame + if(consumed) + { + std::unique_lock lock(mtx); + det_image = detector.mat_to_image_resize(cur_frame); + auto old_result_vec = detector.tracking_id(result_vec); + auto detected_result_vec = thread_result_vec; + result_vec = detected_result_vec; #ifdef TRACK_OPTFLOW - // track optical flow - if (track_optflow_queue.size() > 0) { - //std::cout << "\n !!!! all = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl; - cv::Mat first_frame = track_optflow_queue.front(); - tracker_flow.update_tracking_flow(track_optflow_queue.front(), result_vec); + // track optical flow + if (track_optflow_queue.size() > 0) { + //std::cout << "\n !!!! all = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl; + cv::Mat first_frame = track_optflow_queue.front(); + tracker_flow.update_tracking_flow(track_optflow_queue.front(), result_vec); - while (track_optflow_queue.size() > 1) { - track_optflow_queue.pop(); - result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), true); - } - track_optflow_queue.pop(); - passed_flow_frames = 0; + while (track_optflow_queue.size() > 1) { + track_optflow_queue.pop(); + result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), true); + } + track_optflow_queue.pop(); + passed_flow_frames = 0; - result_vec = detector.tracking_id(result_vec); - auto tmp_result_vec = detector.tracking_id(detected_result_vec, false); - small_preview.set(first_frame, tmp_result_vec); + result_vec = detector.tracking_id(result_vec); + auto tmp_result_vec = detector.tracking_id(detected_result_vec, false); + small_preview.set(first_frame, tmp_result_vec); - extrapolate_coords.new_result(tmp_result_vec, old_time_extrapolate); - old_time_extrapolate = cur_time_extrapolate; - extrapolate_coords.update_result(result_vec, cur_time_extrapolate - 1); - } + extrapolate_coords.new_result(tmp_result_vec, old_time_extrapolate); + old_time_extrapolate = cur_time_extrapolate; + extrapolate_coords.update_result(result_vec, cur_time_extrapolate - 1); + } #else - result_vec = detector.tracking_id(result_vec); // comment it - if track_id is not required - extrapolate_coords.new_result(result_vec, cur_time_extrapolate - 1); + result_vec = detector.tracking_id(result_vec); // comment it - if track_id is not required + extrapolate_coords.new_result(result_vec, cur_time_extrapolate - 1); #endif - // add old tracked objects - for (auto &i : old_result_vec) { - auto it = std::find_if(result_vec.begin(), result_vec.end(), - [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; }); - bool track_id_absent = (it == result_vec.end()); - if (track_id_absent) { - if (i.frames_counter-- > 1) - result_vec.push_back(i); - } - else { - it->frames_counter = std::min((unsigned)3, i.frames_counter + 1); - } - } + // add old tracked objects + for (auto &i : old_result_vec) { + auto it = std::find_if(result_vec.begin(), result_vec.end(), + [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; }); + bool track_id_absent = (it == result_vec.end()); + if (track_id_absent) { + if (i.frames_counter-- > 1) + result_vec.push_back(i); + } + else { + it->frames_counter = std::min((unsigned)3, i.frames_counter + 1); + } + } #ifdef TRACK_OPTFLOW - tracker_flow.update_cur_bbox_vec(result_vec); - result_vec = tracker_flow.tracking_flow(cur_frame, true); // track optical flow + tracker_flow.update_cur_bbox_vec(result_vec); + result_vec = tracker_flow.tracking_flow(cur_frame, true); // track optical flow #endif - consumed = false; - cv_pre_tracked.notify_all(); - } - // launch thread once - Detection - if (!t_detect.joinable()) { - t_detect = std::thread([&]() { - auto current_image = det_image; - consumed = true; - while (current_image.use_count() > 0 && !exit_flag) { - auto result = detector.detect_resized(*current_image, frame_size.width, frame_size.height, - thresh, false); // true - ++fps_det_counter; - std::unique_lock lock(mtx); - thread_result_vec = result; - consumed = true; - cv_detected.notify_all(); - if (detector.wait_stream) { - while (consumed && !exit_flag) cv_pre_tracked.wait(lock); - } - current_image = det_image; - } - }); - } - //while (!consumed); // sync detection + consumed = false; + cv_pre_tracked.notify_all(); + } + // launch thread once - Detection + if (!t_detect.joinable()) { + t_detect = std::thread([&]() { + auto current_image = det_image; + consumed = true; + while (current_image.use_count() > 0 && !exit_flag) { + auto result = detector.detect_resized(*current_image, frame_size.width, frame_size.height, + thresh, false); // true + ++fps_det_counter; + std::unique_lock lock(mtx); + thread_result_vec = result; + consumed = true; + cv_detected.notify_all(); + if (detector.wait_stream) { + while (consumed && !exit_flag) cv_pre_tracked.wait(lock); + } + current_image = det_image; + } + }); + } + //while (!consumed); // sync detection - if (!cur_frame.empty()) { - steady_end = std::chrono::steady_clock::now(); - if (std::chrono::duration(steady_end - steady_start).count() >= 1) { - current_det_fps = fps_det_counter; - current_cap_fps = fps_cap_counter; - steady_start = steady_end; - fps_det_counter = 0; - fps_cap_counter = 0; - } + if (!cur_frame.empty()) { + steady_end = std::chrono::steady_clock::now(); + if (std::chrono::duration(steady_end - steady_start).count() >= 1) { + current_det_fps = fps_det_counter; + current_cap_fps = fps_cap_counter; + steady_start = steady_end; + fps_det_counter = 0; + fps_cap_counter = 0; + } - large_preview.set(cur_frame, result_vec); + large_preview.set(cur_frame, result_vec); #ifdef TRACK_OPTFLOW - ++passed_flow_frames; - track_optflow_queue.push(cur_frame.clone()); - result_vec = tracker_flow.tracking_flow(cur_frame); // track optical flow - extrapolate_coords.update_result(result_vec, cur_time_extrapolate); - small_preview.draw(cur_frame, show_small_boxes); -#endif - auto result_vec_draw = result_vec; - if (extrapolate_flag) { - result_vec_draw = extrapolate_coords.predict(cur_time_extrapolate); - cv::putText(cur_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2); - } - draw_boxes(cur_frame, result_vec_draw, obj_names, current_det_fps, current_cap_fps); - //show_console_result(result_vec, obj_names); - large_preview.draw(cur_frame); + ++passed_flow_frames; + track_optflow_queue.push(cur_frame.clone()); + result_vec = tracker_flow.tracking_flow(cur_frame); // track optical flow + extrapolate_coords.update_result(result_vec, cur_time_extrapolate); + small_preview.draw(cur_frame, show_small_boxes); +#endif + auto result_vec_draw = result_vec; + if (extrapolate_flag) { + result_vec_draw = extrapolate_coords.predict(cur_time_extrapolate); + cv::putText(cur_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2); + } + draw_boxes(cur_frame, result_vec_draw, obj_names, current_det_fps, current_cap_fps); + //show_console_result(result_vec, obj_names); + large_preview.draw(cur_frame); - cv::imshow("window name", cur_frame); - int key = cv::waitKey(3); // 3 or 16ms - if (key == 'f') show_small_boxes = !show_small_boxes; - if (key == 'p') while (true) if(cv::waitKey(100) == 'p') break; - if (key == 'e') extrapolate_flag = !extrapolate_flag; - if (key == 27) { exit_flag = true; break; } + cv::imshow("window name", cur_frame); + int key = cv::waitKey(3); // 3 or 16ms + if (key == 'f') show_small_boxes = !show_small_boxes; + if (key == 'p') while (true) if(cv::waitKey(100) == 'p') break; + if (key == 'e') extrapolate_flag = !extrapolate_flag; + if (key == 27) { exit_flag = true; break; } - if (output_video.isOpened() && videowrite_ready) { - if (t_videowrite.joinable()) t_videowrite.join(); - write_frame = cur_frame.clone(); - videowrite_ready = false; - t_videowrite = std::thread([&]() { - output_video << write_frame; videowrite_ready = true; - }); - } - } + if (output_video.isOpened() && videowrite_ready) { + if (t_videowrite.joinable()) t_videowrite.join(); + write_frame = cur_frame.clone(); + videowrite_ready = false; + t_videowrite = std::thread([&]() { + output_video << write_frame; videowrite_ready = true; + }); + } + } #ifndef TRACK_OPTFLOW - // wait detection result for video-file only (not for net-cam) - if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") { - std::unique_lock lock(mtx); - while (!consumed) cv_detected.wait(lock); - } + // wait detection result for video-file only (not for net-cam) + if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") { + std::unique_lock lock(mtx); + while (!consumed) cv_detected.wait(lock); + } #endif - } - exit_flag = true; - if (t_cap.joinable()) t_cap.join(); - if (t_detect.joinable()) t_detect.join(); - if (t_videowrite.joinable()) t_videowrite.join(); - std::cout << "Video ended \n"; - break; - } - else if (file_ext == "txt") { // list of image files - std::ifstream file(filename); - if (!file.is_open()) std::cout << "File not found! \n"; - else - for (std::string line; file >> line;) { - std::cout << line << std::endl; - cv::Mat mat_img = cv::imread(line); - std::vector result_vec = detector.detect(mat_img); - show_console_result(result_vec, obj_names); - //draw_boxes(mat_img, result_vec, obj_names); - //cv::imwrite("res_" + line, mat_img); - } - - } - else { // image file - cv::Mat mat_img = cv::imread(filename); - - auto start = std::chrono::steady_clock::now(); - std::vector result_vec = detector.detect(mat_img); - auto end = std::chrono::steady_clock::now(); - std::chrono::duration spent = end - start; - std::cout << " Time: " << spent.count() << " sec \n"; + } + exit_flag = true; + if (t_cap.joinable()) t_cap.join(); + if (t_detect.joinable()) t_detect.join(); + if (t_videowrite.joinable()) t_videowrite.join(); + std::cout << "Video ended \n"; + break; + } + else if (file_ext == "txt") { // list of image files + std::ifstream file(filename); + if (!file.is_open()) std::cout << "File not found! \n"; + else + for (std::string line; file >> line;) { + std::cout << line << std::endl; + cv::Mat mat_img = cv::imread(line); + std::vector result_vec = detector.detect(mat_img); + show_console_result(result_vec, obj_names); + //draw_boxes(mat_img, result_vec, obj_names); + //cv::imwrite("res_" + line, mat_img); + } + + } + else { // image file + cv::Mat mat_img = cv::imread(filename); + + auto start = std::chrono::steady_clock::now(); + std::vector result_vec = detector.detect(mat_img); + auto end = std::chrono::steady_clock::now(); + std::chrono::duration spent = end - start; + std::cout << " Time: " << spent.count() << " sec \n"; - //result_vec = detector.tracking_id(result_vec); // comment it - if track_id is not required - draw_boxes(mat_img, result_vec, obj_names); - cv::imshow("window name", mat_img); - show_console_result(result_vec, obj_names); - cv::waitKey(0); - } + //result_vec = detector.tracking_id(result_vec); // comment it - if track_id is not required + draw_boxes(mat_img, result_vec, obj_names); + cv::imshow("window name", mat_img); + show_console_result(result_vec, obj_names); + cv::waitKey(0); + } #else - //std::vector result_vec = detector.detect(filename); + //std::vector result_vec = detector.detect(filename); - auto img = detector.load_image(filename); - std::vector result_vec = detector.detect(img); - detector.free_image(img); - show_console_result(result_vec, obj_names); -#endif - } - catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); } - catch (...) { std::cerr << "unknown exception \n"; getchar(); } - filename.clear(); - } + auto img = detector.load_image(filename); + std::vector result_vec = detector.detect(img); + detector.free_image(img); + show_console_result(result_vec, obj_names); +#endif + } + catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); } + catch (...) { std::cerr << "unknown exception \n"; getchar(); } + filename.clear(); + } - return 0; + return 0; } \ No newline at end of file diff --git a/src/yolo_layer.c b/src/yolo_layer.c index f79bc418..f0bc0731 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -38,8 +38,8 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int l.bias_updates = calloc(n*2, sizeof(float)); l.outputs = h*w*n*(classes + 4 + 1); l.inputs = l.outputs; - l.max_boxes = max_boxes; - l.truths = l.max_boxes*(4 + 1); // 90*(4 + 1); + l.max_boxes = max_boxes; + l.truths = l.max_boxes*(4 + 1); // 90*(4 + 1); l.delta = calloc(batch*l.outputs, sizeof(float)); l.output = calloc(batch*l.outputs, sizeof(float)); for(i = 0; i < total*2; ++i){ @@ -117,33 +117,33 @@ void delta_yolo_class(float *output, float *delta, int index, int class_id, int if(avg_cat) *avg_cat += output[index + stride*class_id]; return; } - // Focal loss - if (focal_loss) { - // Focal Loss - float alpha = 0.5; // 0.25 or 0.5 - //float gamma = 2; // hardcoded in many places of the grad-formula + // Focal loss + if (focal_loss) { + // Focal Loss + float alpha = 0.5; // 0.25 or 0.5 + //float gamma = 2; // hardcoded in many places of the grad-formula - int ti = index + stride*class_id; - float pt = output[ti] + 0.000000000000001F; - // http://fooplot.com/#W3sidHlwZSI6MCwiZXEiOiItKDEteCkqKDIqeCpsb2coeCkreC0xKSIsImNvbG9yIjoiIzAwMDAwMCJ9LHsidHlwZSI6MTAwMH1d - float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832 - //float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss + int ti = index + stride*class_id; + float pt = output[ti] + 0.000000000000001F; + // http://fooplot.com/#W3sidHlwZSI6MCwiZXEiOiItKDEteCkqKDIqeCpsb2coeCkreC0xKSIsImNvbG9yIjoiIzAwMDAwMCJ9LHsidHlwZSI6MTAwMH1d + float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832 + //float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss - for (n = 0; n < classes; ++n) { - delta[index + stride*n] = (((n == class_id) ? 1 : 0) - output[index + stride*n]); + for (n = 0; n < classes; ++n) { + delta[index + stride*n] = (((n == class_id) ? 1 : 0) - output[index + stride*n]); - delta[index + stride*n] *= alpha*grad; + delta[index + stride*n] *= alpha*grad; - if (n == class_id) *avg_cat += output[index + stride*n]; - } - } - else { - // default - for (n = 0; n < classes; ++n) { - delta[index + stride*n] = ((n == class_id) ? 1 : 0) - output[index + stride*n]; - if (n == class_id && avg_cat) *avg_cat += output[index + stride*n]; - } - } + if (n == class_id) *avg_cat += output[index + stride*n]; + } + } + else { + // default + for (n = 0; n < classes; ++n) { + delta[index + stride*n] = ((n == class_id) ? 1 : 0) - output[index + stride*n]; + if (n == class_id && avg_cat) *avg_cat += output[index + stride*n]; + } + } } static int entry_index(layer l, int batch, int location, int entry) @@ -155,12 +155,12 @@ static int entry_index(layer l, int batch, int location, int entry) static box float_to_box_stride(float *f, int stride) { - box b = { 0 }; - b.x = f[0]; - b.y = f[1 * stride]; - b.w = f[2 * stride]; - b.h = f[3 * stride]; - return b; + box b = { 0 }; + b.x = f[0]; + b.y = f[1 * stride]; + b.w = f[2 * stride]; + b.h = f[3 * stride]; + return b; } void forward_yolo_layer(const layer l, network_state state) @@ -200,12 +200,12 @@ void forward_yolo_layer(const layer l, network_state state) int best_t = 0; for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (class_id >= l.classes) { - printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); - getchar(); - continue; // if label contains class_id more than number of classes in the cfg-file - } + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (class_id >= l.classes) { + printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); + getchar(); + continue; // if label contains class_id more than number of classes in the cfg-file + } if(!truth.x) break; float iou = box_iou(pred, truth); if (iou > best_iou) { @@ -234,8 +234,8 @@ void forward_yolo_layer(const layer l, network_state state) } for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (class_id >= l.classes) continue; // if label contains class_id more than number of classes in the cfg-file + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (class_id >= l.classes) continue; // if label contains class_id more than number of classes in the cfg-file if(!truth.x) break; float best_iou = 0; @@ -291,20 +291,20 @@ void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth int i; int new_w=0; int new_h=0; - if (letter) { - if (((float)netw / w) < ((float)neth / h)) { - new_w = netw; - new_h = (h * netw) / w; - } - else { - new_h = neth; - new_w = (w * neth) / h; - } - } - else { - new_w = netw; - new_h = neth; - } + if (letter) { + if (((float)netw / w) < ((float)neth / h)) { + new_w = netw; + new_h = (h * netw) / w; + } + else { + new_h = neth; + new_w = (w * neth) / h; + } + } + else { + new_w = netw; + new_h = neth; + } for (i = 0; i < n; ++i){ box b = dets[i].bbox; b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); @@ -411,25 +411,25 @@ void forward_yolo_layer_gpu(const layer l, network_state state) } //cuda_pull_array(l.output_gpu, state.input, l.batch*l.inputs); - float *in_cpu = calloc(l.batch*l.inputs, sizeof(float)); - cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs); - float *truth_cpu = 0; - if (state.truth) { - int num_truth = l.batch*l.truths; - truth_cpu = calloc(num_truth, sizeof(float)); - cuda_pull_array(state.truth, truth_cpu, num_truth); - } - network_state cpu_state = state; - cpu_state.net = state.net; - cpu_state.index = state.index; - cpu_state.train = state.train; - cpu_state.truth = truth_cpu; - cpu_state.input = in_cpu; - forward_yolo_layer(l, cpu_state); + float *in_cpu = calloc(l.batch*l.inputs, sizeof(float)); + cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs); + float *truth_cpu = 0; + if (state.truth) { + int num_truth = l.batch*l.truths; + truth_cpu = calloc(num_truth, sizeof(float)); + cuda_pull_array(state.truth, truth_cpu, num_truth); + } + network_state cpu_state = state; + cpu_state.net = state.net; + cpu_state.index = state.index; + cpu_state.train = state.train; + cpu_state.truth = truth_cpu; + cpu_state.input = in_cpu; + forward_yolo_layer(l, cpu_state); //forward_yolo_layer(l, state); cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); - free(in_cpu); - if (cpu_state.truth) free(cpu_state.truth); + free(in_cpu); + if (cpu_state.truth) free(cpu_state.truth); } void backward_yolo_layer_gpu(const layer l, network_state state) diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp index 4df9be5d..980db93a 100644 --- a/src/yolo_v2_class.cpp +++ b/src/yolo_v2_class.cpp @@ -50,310 +50,310 @@ int detect_mat(const uint8_t* data, const size_t data_length, bbox_t_container & return detection.size(); #else return -1; -#endif // OPENCV +#endif // OPENCV } int dispose() { - //if (detector != NULL) delete detector; - //detector = NULL; + //if (detector != NULL) delete detector; + //detector = NULL; detector.reset(); return 1; } #ifdef GPU void check_cuda(cudaError_t status) { - if (status != cudaSuccess) { - const char *s = cudaGetErrorString(status); - printf("CUDA Error Prev: %s\n", s); - } + if (status != cudaSuccess) { + const char *s = cudaGetErrorString(status); + printf("CUDA Error Prev: %s\n", s); + } } #endif struct detector_gpu_t { - network net; - image images[FRAMES]; - float *avg; - float *predictions[FRAMES]; - int demo_index; - unsigned int *track_id; + network net; + image images[FRAMES]; + float *avg; + float *predictions[FRAMES]; + int demo_index; + unsigned int *track_id; }; YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id) { - wait_stream = 0; - int old_gpu_index; + wait_stream = 0; + int old_gpu_index; #ifdef GPU - check_cuda( cudaGetDevice(&old_gpu_index) ); + check_cuda( cudaGetDevice(&old_gpu_index) ); #endif - detector_gpu_ptr = std::make_shared(); - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); + detector_gpu_ptr = std::make_shared(); + detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); #ifdef GPU - //check_cuda( cudaSetDevice(cur_gpu_id) ); - cuda_set_device(cur_gpu_id); - printf(" Used GPU %d \n", cur_gpu_id); + //check_cuda( cudaSetDevice(cur_gpu_id) ); + cuda_set_device(cur_gpu_id); + printf(" Used GPU %d \n", cur_gpu_id); #endif - network &net = detector_gpu.net; - net.gpu_index = cur_gpu_id; - //gpu_index = i; - - char *cfgfile = const_cast(cfg_filename.data()); - char *weightfile = const_cast(weight_filename.data()); + network &net = detector_gpu.net; + net.gpu_index = cur_gpu_id; + //gpu_index = i; + + char *cfgfile = const_cast(cfg_filename.data()); + char *weightfile = const_cast(weight_filename.data()); - net = parse_network_cfg_custom(cfgfile, 1); - if (weightfile) { - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - net.gpu_index = cur_gpu_id; - fuse_conv_batchnorm(net); + net = parse_network_cfg_custom(cfgfile, 1); + if (weightfile) { + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + net.gpu_index = cur_gpu_id; + fuse_conv_batchnorm(net); - layer l = net.layers[net.n - 1]; - int j; + layer l = net.layers[net.n - 1]; + int j; - detector_gpu.avg = (float *)calloc(l.outputs, sizeof(float)); - for (j = 0; j < FRAMES; ++j) detector_gpu.predictions[j] = (float *)calloc(l.outputs, sizeof(float)); - for (j = 0; j < FRAMES; ++j) detector_gpu.images[j] = make_image(1, 1, 3); + detector_gpu.avg = (float *)calloc(l.outputs, sizeof(float)); + for (j = 0; j < FRAMES; ++j) detector_gpu.predictions[j] = (float *)calloc(l.outputs, sizeof(float)); + for (j = 0; j < FRAMES; ++j) detector_gpu.images[j] = make_image(1, 1, 3); - detector_gpu.track_id = (unsigned int *)calloc(l.classes, sizeof(unsigned int)); - for (j = 0; j < l.classes; ++j) detector_gpu.track_id[j] = 1; + detector_gpu.track_id = (unsigned int *)calloc(l.classes, sizeof(unsigned int)); + for (j = 0; j < l.classes; ++j) detector_gpu.track_id[j] = 1; #ifdef GPU - check_cuda( cudaSetDevice(old_gpu_index) ); + check_cuda( cudaSetDevice(old_gpu_index) ); #endif } YOLODLL_API Detector::~Detector() { - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - layer l = detector_gpu.net.layers[detector_gpu.net.n - 1]; + detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); + layer l = detector_gpu.net.layers[detector_gpu.net.n - 1]; - free(detector_gpu.track_id); + free(detector_gpu.track_id); - free(detector_gpu.avg); - for (int j = 0; j < FRAMES; ++j) free(detector_gpu.predictions[j]); - for (int j = 0; j < FRAMES; ++j) if(detector_gpu.images[j].data) free(detector_gpu.images[j].data); + free(detector_gpu.avg); + for (int j = 0; j < FRAMES; ++j) free(detector_gpu.predictions[j]); + for (int j = 0; j < FRAMES; ++j) if(detector_gpu.images[j].data) free(detector_gpu.images[j].data); - int old_gpu_index; + int old_gpu_index; #ifdef GPU - cudaGetDevice(&old_gpu_index); - cuda_set_device(detector_gpu.net.gpu_index); + cudaGetDevice(&old_gpu_index); + cuda_set_device(detector_gpu.net.gpu_index); #endif - free_network(detector_gpu.net); + free_network(detector_gpu.net); #ifdef GPU - cudaSetDevice(old_gpu_index); + cudaSetDevice(old_gpu_index); #endif } YOLODLL_API int Detector::get_net_width() const { - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - return detector_gpu.net.w; + detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); + return detector_gpu.net.w; } YOLODLL_API int Detector::get_net_height() const { - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - return detector_gpu.net.h; + detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); + return detector_gpu.net.h; } YOLODLL_API int Detector::get_net_color_depth() const { - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - return detector_gpu.net.c; + detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); + return detector_gpu.net.c; } YOLODLL_API std::vector Detector::detect(std::string image_filename, float thresh, bool use_mean) { - std::shared_ptr image_ptr(new image_t, [](image_t *img) { if (img->data) free(img->data); delete img; }); - *image_ptr = load_image(image_filename); - return detect(*image_ptr, thresh, use_mean); + std::shared_ptr image_ptr(new image_t, [](image_t *img) { if (img->data) free(img->data); delete img; }); + *image_ptr = load_image(image_filename); + return detect(*image_ptr, thresh, use_mean); } static image load_image_stb(char *filename, int channels) { - int w, h, c; - unsigned char *data = stbi_load(filename, &w, &h, &c, channels); - if (!data) - throw std::runtime_error("file not found"); - if (channels) c = channels; - int i, j, k; - image im = make_image(w, h, c); - for (k = 0; k < c; ++k) { - for (j = 0; j < h; ++j) { - for (i = 0; i < w; ++i) { - int dst_index = i + w*j + w*h*k; - int src_index = k + c*i + c*w*j; - im.data[dst_index] = (float)data[src_index] / 255.; - } - } - } - free(data); - return im; + int w, h, c; + unsigned char *data = stbi_load(filename, &w, &h, &c, channels); + if (!data) + throw std::runtime_error("file not found"); + if (channels) c = channels; + int i, j, k; + image im = make_image(w, h, c); + for (k = 0; k < c; ++k) { + for (j = 0; j < h; ++j) { + for (i = 0; i < w; ++i) { + int dst_index = i + w*j + w*h*k; + int src_index = k + c*i + c*w*j; + im.data[dst_index] = (float)data[src_index] / 255.; + } + } + } + free(data); + return im; } YOLODLL_API image_t Detector::load_image(std::string image_filename) { - char *input = const_cast(image_filename.data()); - image im = load_image_stb(input, 3); + char *input = const_cast(image_filename.data()); + image im = load_image_stb(input, 3); - image_t img; - img.c = im.c; - img.data = im.data; - img.h = im.h; - img.w = im.w; + image_t img; + img.c = im.c; + img.data = im.data; + img.h = im.h; + img.w = im.w; - return img; + return img; } YOLODLL_API void Detector::free_image(image_t m) { - if (m.data) { - free(m.data); - } + if (m.data) { + free(m.data); + } } YOLODLL_API std::vector Detector::detect(image_t img, float thresh, bool use_mean) { - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - network &net = detector_gpu.net; - int old_gpu_index; + detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); + network &net = detector_gpu.net; + int old_gpu_index; #ifdef GPU - cudaGetDevice(&old_gpu_index); - if(cur_gpu_id != old_gpu_index) - cudaSetDevice(net.gpu_index); + cudaGetDevice(&old_gpu_index); + if(cur_gpu_id != old_gpu_index) + cudaSetDevice(net.gpu_index); - net.wait_stream = wait_stream; // 1 - wait CUDA-stream, 0 - not to wait + net.wait_stream = wait_stream; // 1 - wait CUDA-stream, 0 - not to wait #endif - //std::cout << "net.gpu_index = " << net.gpu_index << std::endl; + //std::cout << "net.gpu_index = " << net.gpu_index << std::endl; - //float nms = .4; + //float nms = .4; - image im; - im.c = img.c; - im.data = img.data; - im.h = img.h; - im.w = img.w; + image im; + im.c = img.c; + im.data = img.data; + im.h = img.h; + im.w = img.w; - image sized; - - if (net.w == im.w && net.h == im.h) { - sized = make_image(im.w, im.h, im.c); - memcpy(sized.data, im.data, im.w*im.h*im.c * sizeof(float)); - } - else - sized = resize_image(im, net.w, net.h); + image sized; + + if (net.w == im.w && net.h == im.h) { + sized = make_image(im.w, im.h, im.c); + memcpy(sized.data, im.data, im.w*im.h*im.c * sizeof(float)); + } + else + sized = resize_image(im, net.w, net.h); - layer l = net.layers[net.n - 1]; + layer l = net.layers[net.n - 1]; - float *X = sized.data; + float *X = sized.data; - float *prediction = network_predict(net, X); + float *prediction = network_predict(net, X); - if (use_mean) { - memcpy(detector_gpu.predictions[detector_gpu.demo_index], prediction, l.outputs * sizeof(float)); - mean_arrays(detector_gpu.predictions, FRAMES, l.outputs, detector_gpu.avg); - l.output = detector_gpu.avg; - detector_gpu.demo_index = (detector_gpu.demo_index + 1) % FRAMES; - } - //get_region_boxes(l, 1, 1, thresh, detector_gpu.probs, detector_gpu.boxes, 0, 0); - //if (nms) do_nms_sort(detector_gpu.boxes, detector_gpu.probs, l.w*l.h*l.n, l.classes, nms); + if (use_mean) { + memcpy(detector_gpu.predictions[detector_gpu.demo_index], prediction, l.outputs * sizeof(float)); + mean_arrays(detector_gpu.predictions, FRAMES, l.outputs, detector_gpu.avg); + l.output = detector_gpu.avg; + detector_gpu.demo_index = (detector_gpu.demo_index + 1) % FRAMES; + } + //get_region_boxes(l, 1, 1, thresh, detector_gpu.probs, detector_gpu.boxes, 0, 0); + //if (nms) do_nms_sort(detector_gpu.boxes, detector_gpu.probs, l.w*l.h*l.n, l.classes, nms); - int nboxes = 0; - int letterbox = 0; - float hier_thresh = 0.5; - detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letterbox); - if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + int nboxes = 0; + int letterbox = 0; + float hier_thresh = 0.5; + detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letterbox); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); - std::vector bbox_vec; + std::vector bbox_vec; - for (size_t i = 0; i < nboxes; ++i) { - box b = dets[i].bbox; - int const obj_id = max_index(dets[i].prob, l.classes); - float const prob = dets[i].prob[obj_id]; - - if (prob > thresh) - { - bbox_t bbox; - bbox.x = std::max((double)0, (b.x - b.w / 2.)*im.w); - bbox.y = std::max((double)0, (b.y - b.h / 2.)*im.h); - bbox.w = b.w*im.w; - bbox.h = b.h*im.h; - bbox.obj_id = obj_id; - bbox.prob = prob; - bbox.track_id = 0; + for (size_t i = 0; i < nboxes; ++i) { + box b = dets[i].bbox; + int const obj_id = max_index(dets[i].prob, l.classes); + float const prob = dets[i].prob[obj_id]; + + if (prob > thresh) + { + bbox_t bbox; + bbox.x = std::max((double)0, (b.x - b.w / 2.)*im.w); + bbox.y = std::max((double)0, (b.y - b.h / 2.)*im.h); + bbox.w = b.w*im.w; + bbox.h = b.h*im.h; + bbox.obj_id = obj_id; + bbox.prob = prob; + bbox.track_id = 0; - bbox_vec.push_back(bbox); - } - } + bbox_vec.push_back(bbox); + } + } - free_detections(dets, nboxes); - if(sized.data) - free(sized.data); + free_detections(dets, nboxes); + if(sized.data) + free(sized.data); #ifdef GPU - if (cur_gpu_id != old_gpu_index) - cudaSetDevice(old_gpu_index); + if (cur_gpu_id != old_gpu_index) + cudaSetDevice(old_gpu_index); #endif - return bbox_vec; + return bbox_vec; } YOLODLL_API std::vector Detector::tracking_id(std::vector cur_bbox_vec, bool const change_history, - int const frames_story, int const max_dist) + int const frames_story, int const max_dist) { - detector_gpu_t &det_gpu = *static_cast(detector_gpu_ptr.get()); + detector_gpu_t &det_gpu = *static_cast(detector_gpu_ptr.get()); - bool prev_track_id_present = false; - for (auto &i : prev_bbox_vec_deque) - if (i.size() > 0) prev_track_id_present = true; + bool prev_track_id_present = false; + for (auto &i : prev_bbox_vec_deque) + if (i.size() > 0) prev_track_id_present = true; - if (!prev_track_id_present) { - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) - cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++; - prev_bbox_vec_deque.push_front(cur_bbox_vec); - if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); - return cur_bbox_vec; - } + if (!prev_track_id_present) { + for (size_t i = 0; i < cur_bbox_vec.size(); ++i) + cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++; + prev_bbox_vec_deque.push_front(cur_bbox_vec); + if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); + return cur_bbox_vec; + } - std::vector dist_vec(cur_bbox_vec.size(), std::numeric_limits::max()); + std::vector dist_vec(cur_bbox_vec.size(), std::numeric_limits::max()); - for (auto &prev_bbox_vec : prev_bbox_vec_deque) { - for (auto &i : prev_bbox_vec) { - int cur_index = -1; - for (size_t m = 0; m < cur_bbox_vec.size(); ++m) { - bbox_t const& k = cur_bbox_vec[m]; - if (i.obj_id == k.obj_id) { - float center_x_diff = (float)(i.x + i.w/2) - (float)(k.x + k.w/2); - float center_y_diff = (float)(i.y + i.h/2) - (float)(k.y + k.h/2); - unsigned int cur_dist = sqrt(center_x_diff*center_x_diff + center_y_diff*center_y_diff); - if (cur_dist < max_dist && (k.track_id == 0 || dist_vec[m] > cur_dist)) { - dist_vec[m] = cur_dist; - cur_index = m; - } - } - } + for (auto &prev_bbox_vec : prev_bbox_vec_deque) { + for (auto &i : prev_bbox_vec) { + int cur_index = -1; + for (size_t m = 0; m < cur_bbox_vec.size(); ++m) { + bbox_t const& k = cur_bbox_vec[m]; + if (i.obj_id == k.obj_id) { + float center_x_diff = (float)(i.x + i.w/2) - (float)(k.x + k.w/2); + float center_y_diff = (float)(i.y + i.h/2) - (float)(k.y + k.h/2); + unsigned int cur_dist = sqrt(center_x_diff*center_x_diff + center_y_diff*center_y_diff); + if (cur_dist < max_dist && (k.track_id == 0 || dist_vec[m] > cur_dist)) { + dist_vec[m] = cur_dist; + cur_index = m; + } + } + } - bool track_id_absent = !std::any_of(cur_bbox_vec.begin(), cur_bbox_vec.end(), - [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; }); + bool track_id_absent = !std::any_of(cur_bbox_vec.begin(), cur_bbox_vec.end(), + [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; }); - if (cur_index >= 0 && track_id_absent){ - cur_bbox_vec[cur_index].track_id = i.track_id; - cur_bbox_vec[cur_index].w = (cur_bbox_vec[cur_index].w + i.w) / 2; - cur_bbox_vec[cur_index].h = (cur_bbox_vec[cur_index].h + i.h) / 2; - } - } - } + if (cur_index >= 0 && track_id_absent){ + cur_bbox_vec[cur_index].track_id = i.track_id; + cur_bbox_vec[cur_index].w = (cur_bbox_vec[cur_index].w + i.w) / 2; + cur_bbox_vec[cur_index].h = (cur_bbox_vec[cur_index].h + i.h) / 2; + } + } + } - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) - if (cur_bbox_vec[i].track_id == 0) - cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++; + for (size_t i = 0; i < cur_bbox_vec.size(); ++i) + if (cur_bbox_vec[i].track_id == 0) + cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++; - if (change_history) { - prev_bbox_vec_deque.push_front(cur_bbox_vec); - if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); - } + if (change_history) { + prev_bbox_vec_deque.push_front(cur_bbox_vec); + if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); + } - return cur_bbox_vec; + return cur_bbox_vec; } \ No newline at end of file diff --git a/src/yolo_v2_class.hpp b/src/yolo_v2_class.hpp index 8f7f6cab..8626d97f 100644 --- a/src/yolo_v2_class.hpp +++ b/src/yolo_v2_class.hpp @@ -14,18 +14,18 @@ #endif struct bbox_t { - unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box - float prob; // confidence - probability that the object was found correctly - unsigned int obj_id; // class of object - from range [0, classes-1] - unsigned int track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) - unsigned int frames_counter;// counter of frames on which the object was detected + unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box + float prob; // confidence - probability that the object was found correctly + unsigned int obj_id; // class of object - from range [0, classes-1] + unsigned int track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) + unsigned int frames_counter;// counter of frames on which the object was detected }; struct image_t { - int h; // height - int w; // width - int c; // number of chanels (3 - for RGB) - float *data; // pointer to the image data + int h; // height + int w; // width + int c; // number of chanels (3 - for RGB) + float *data; // pointer to the image data }; #define C_SHARP_MAX_OBJECTS 1000 @@ -40,10 +40,10 @@ struct bbox_t_container { #include #ifdef OPENCV -#include // C++ -#include "opencv2/highgui/highgui_c.h" // C -#include "opencv2/imgproc/imgproc_c.h" // C -#endif // OPENCV +#include // C++ +#include "opencv2/highgui/highgui_c.h" // C +#include "opencv2/imgproc/imgproc_c.h" // C +#endif // OPENCV extern "C" YOLODLL_API int init(const char *configurationFilename, const char *weightsFilename, int gpu); extern "C" YOLODLL_API int detect_image(const char *filename, bbox_t_container &container); @@ -51,106 +51,106 @@ extern "C" YOLODLL_API int detect_mat(const uint8_t* data, const size_t data_len extern "C" YOLODLL_API int dispose(); class Detector { - std::shared_ptr detector_gpu_ptr; - std::deque> prev_bbox_vec_deque; - const int cur_gpu_id; + std::shared_ptr detector_gpu_ptr; + std::deque> prev_bbox_vec_deque; + const int cur_gpu_id; public: - float nms = .4; - bool wait_stream; + float nms = .4; + bool wait_stream; - YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0); - YOLODLL_API ~Detector(); + YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0); + YOLODLL_API ~Detector(); - YOLODLL_API std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false); - YOLODLL_API std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false); - static YOLODLL_API image_t load_image(std::string image_filename); - static YOLODLL_API void free_image(image_t m); - YOLODLL_API int get_net_width() const; - YOLODLL_API int get_net_height() const; - YOLODLL_API int get_net_color_depth() const; + YOLODLL_API std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false); + YOLODLL_API std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false); + static YOLODLL_API image_t load_image(std::string image_filename); + static YOLODLL_API void free_image(image_t m); + YOLODLL_API int get_net_width() const; + YOLODLL_API int get_net_height() const; + YOLODLL_API int get_net_color_depth() const; - YOLODLL_API std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true, - int const frames_story = 10, int const max_dist = 150); + YOLODLL_API std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true, + int const frames_story = 10, int const max_dist = 150); - std::vector detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false) - { - if (img.data == NULL) - throw std::runtime_error("Image is empty"); - auto detection_boxes = detect(img, thresh, use_mean); - float wk = (float)init_w / img.w, hk = (float)init_h / img.h; - for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk; - return detection_boxes; - } + std::vector detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false) + { + if (img.data == NULL) + throw std::runtime_error("Image is empty"); + auto detection_boxes = detect(img, thresh, use_mean); + float wk = (float)init_w / img.w, hk = (float)init_h / img.h; + for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk; + return detection_boxes; + } #ifdef OPENCV - std::vector detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false) - { - if(mat.data == NULL) - throw std::runtime_error("Image is empty"); - auto image_ptr = mat_to_image_resize(mat); - return detect_resized(*image_ptr, mat.cols, mat.rows, thresh, use_mean); - } + std::vector detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false) + { + if(mat.data == NULL) + throw std::runtime_error("Image is empty"); + auto image_ptr = mat_to_image_resize(mat); + return detect_resized(*image_ptr, mat.cols, mat.rows, thresh, use_mean); + } - std::shared_ptr mat_to_image_resize(cv::Mat mat) const - { - if (mat.data == NULL) return std::shared_ptr(NULL); - cv::Mat det_mat; - cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height())); - return mat_to_image(det_mat); - } + std::shared_ptr mat_to_image_resize(cv::Mat mat) const + { + if (mat.data == NULL) return std::shared_ptr(NULL); + cv::Mat det_mat; + cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height())); + return mat_to_image(det_mat); + } - static std::shared_ptr mat_to_image(cv::Mat img_src) - { - cv::Mat img; - cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR); - std::shared_ptr image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; }); - std::shared_ptr ipl_small = std::make_shared(img); - *image_ptr = ipl_to_image(ipl_small.get()); - return image_ptr; - } + static std::shared_ptr mat_to_image(cv::Mat img_src) + { + cv::Mat img; + cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR); + std::shared_ptr image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; }); + std::shared_ptr ipl_small = std::make_shared(img); + *image_ptr = ipl_to_image(ipl_small.get()); + return image_ptr; + } private: - static image_t ipl_to_image(IplImage* src) - { - unsigned char *data = (unsigned char *)src->imageData; - int h = src->height; - int w = src->width; - int c = src->nChannels; - int step = src->widthStep; - image_t out = make_image_custom(w, h, c); - int count = 0; + static image_t ipl_to_image(IplImage* src) + { + unsigned char *data = (unsigned char *)src->imageData; + int h = src->height; + int w = src->width; + int c = src->nChannels; + int step = src->widthStep; + image_t out = make_image_custom(w, h, c); + int count = 0; - for (int k = 0; k < c; ++k) { - for (int i = 0; i < h; ++i) { - int i_step = i*step; - for (int j = 0; j < w; ++j) { - out.data[count++] = data[i_step + j*c + k] / 255.; - } - } - } + for (int k = 0; k < c; ++k) { + for (int i = 0; i < h; ++i) { + int i_step = i*step; + for (int j = 0; j < w; ++j) { + out.data[count++] = data[i_step + j*c + k] / 255.; + } + } + } - return out; - } + return out; + } - static image_t make_empty_image(int w, int h, int c) - { - image_t out; - out.data = 0; - out.h = h; - out.w = w; - out.c = c; - return out; - } + static image_t make_empty_image(int w, int h, int c) + { + image_t out; + out.data = 0; + out.h = h; + out.w = w; + out.c = c; + return out; + } - static image_t make_image_custom(int w, int h, int c) - { - image_t out = make_empty_image(w, h, c); - out.data = (float *)calloc(h*w*c, sizeof(float)); - return out; - } + static image_t make_image_custom(int w, int h, int c) + { + image_t out = make_empty_image(w, h, c); + out.data = (float *)calloc(h*w*c, sizeof(float)); + return out; + } -#endif // OPENCV +#endif // OPENCV }; @@ -165,170 +165,170 @@ private: class Tracker_optflow { public: - const int gpu_count; - const int gpu_id; - const int flow_error; + const int gpu_count; + const int gpu_id; + const int flow_error; - Tracker_optflow(int _gpu_id = 0, int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) : - gpu_count(cv::cuda::getCudaEnabledDeviceCount()), gpu_id(std::min(_gpu_id, gpu_count-1)), - flow_error((_flow_error > 0)? _flow_error:(win_size*4)) - { - int const old_gpu_id = cv::cuda::getDevice(); - cv::cuda::setDevice(gpu_id); + Tracker_optflow(int _gpu_id = 0, int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) : + gpu_count(cv::cuda::getCudaEnabledDeviceCount()), gpu_id(std::min(_gpu_id, gpu_count-1)), + flow_error((_flow_error > 0)? _flow_error:(win_size*4)) + { + int const old_gpu_id = cv::cuda::getDevice(); + cv::cuda::setDevice(gpu_id); - stream = cv::cuda::Stream(); + stream = cv::cuda::Stream(); - sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create(); - sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(win_size, win_size)); // 9, 15, 21, 31 - sync_PyrLKOpticalFlow_gpu->setMaxLevel(max_level); // +- 3 pt - sync_PyrLKOpticalFlow_gpu->setNumIters(iterations); // 2000, def: 30 + sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create(); + sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(win_size, win_size)); // 9, 15, 21, 31 + sync_PyrLKOpticalFlow_gpu->setMaxLevel(max_level); // +- 3 pt + sync_PyrLKOpticalFlow_gpu->setNumIters(iterations); // 2000, def: 30 - cv::cuda::setDevice(old_gpu_id); - } + cv::cuda::setDevice(old_gpu_id); + } - // just to avoid extra allocations - cv::cuda::GpuMat src_mat_gpu; - cv::cuda::GpuMat dst_mat_gpu, dst_grey_gpu; - cv::cuda::GpuMat prev_pts_flow_gpu, cur_pts_flow_gpu; - cv::cuda::GpuMat status_gpu, err_gpu; + // just to avoid extra allocations + cv::cuda::GpuMat src_mat_gpu; + cv::cuda::GpuMat dst_mat_gpu, dst_grey_gpu; + cv::cuda::GpuMat prev_pts_flow_gpu, cur_pts_flow_gpu; + cv::cuda::GpuMat status_gpu, err_gpu; - cv::cuda::GpuMat src_grey_gpu; // used in both functions - cv::Ptr sync_PyrLKOpticalFlow_gpu; - cv::cuda::Stream stream; + cv::cuda::GpuMat src_grey_gpu; // used in both functions + cv::Ptr sync_PyrLKOpticalFlow_gpu; + cv::cuda::Stream stream; - std::vector cur_bbox_vec; - std::vector good_bbox_vec_flags; - cv::Mat prev_pts_flow_cpu; + std::vector cur_bbox_vec; + std::vector good_bbox_vec_flags; + cv::Mat prev_pts_flow_cpu; - void update_cur_bbox_vec(std::vector _cur_bbox_vec) - { - cur_bbox_vec = _cur_bbox_vec; - good_bbox_vec_flags = std::vector(cur_bbox_vec.size(), true); - cv::Mat prev_pts, cur_pts_flow_cpu; + void update_cur_bbox_vec(std::vector _cur_bbox_vec) + { + cur_bbox_vec = _cur_bbox_vec; + good_bbox_vec_flags = std::vector(cur_bbox_vec.size(), true); + cv::Mat prev_pts, cur_pts_flow_cpu; - for (auto &i : cur_bbox_vec) { - float x_center = (i.x + i.w / 2.0F); - float y_center = (i.y + i.h / 2.0F); - prev_pts.push_back(cv::Point2f(x_center, y_center)); - } + for (auto &i : cur_bbox_vec) { + float x_center = (i.x + i.w / 2.0F); + float y_center = (i.y + i.h / 2.0F); + prev_pts.push_back(cv::Point2f(x_center, y_center)); + } - if (prev_pts.rows == 0) - prev_pts_flow_cpu = cv::Mat(); - else - cv::transpose(prev_pts, prev_pts_flow_cpu); + if (prev_pts.rows == 0) + prev_pts_flow_cpu = cv::Mat(); + else + cv::transpose(prev_pts, prev_pts_flow_cpu); - if (prev_pts_flow_gpu.cols < prev_pts_flow_cpu.cols) { - prev_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type()); - cur_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type()); + if (prev_pts_flow_gpu.cols < prev_pts_flow_cpu.cols) { + prev_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type()); + cur_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type()); - status_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_8UC1); - err_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_32FC1); - } + status_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_8UC1); + err_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_32FC1); + } - prev_pts_flow_gpu.upload(cv::Mat(prev_pts_flow_cpu), stream); - } + prev_pts_flow_gpu.upload(cv::Mat(prev_pts_flow_cpu), stream); + } - void update_tracking_flow(cv::Mat src_mat, std::vector _cur_bbox_vec) - { - int const old_gpu_id = cv::cuda::getDevice(); - if (old_gpu_id != gpu_id) - cv::cuda::setDevice(gpu_id); + void update_tracking_flow(cv::Mat src_mat, std::vector _cur_bbox_vec) + { + int const old_gpu_id = cv::cuda::getDevice(); + if (old_gpu_id != gpu_id) + cv::cuda::setDevice(gpu_id); - if (src_mat.channels() == 3) { - if (src_mat_gpu.cols == 0) { - src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type()); - src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1); - } + if (src_mat.channels() == 3) { + if (src_mat_gpu.cols == 0) { + src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type()); + src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1); + } - update_cur_bbox_vec(_cur_bbox_vec); + update_cur_bbox_vec(_cur_bbox_vec); - //src_grey_gpu.upload(src_mat, stream); // use BGR - src_mat_gpu.upload(src_mat, stream); - cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream); - } - if (old_gpu_id != gpu_id) - cv::cuda::setDevice(old_gpu_id); - } + //src_grey_gpu.upload(src_mat, stream); // use BGR + src_mat_gpu.upload(src_mat, stream); + cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream); + } + if (old_gpu_id != gpu_id) + cv::cuda::setDevice(old_gpu_id); + } - std::vector tracking_flow(cv::Mat dst_mat, bool check_error = true) - { - if (sync_PyrLKOpticalFlow_gpu.empty()) { - std::cout << "sync_PyrLKOpticalFlow_gpu isn't initialized \n"; - return cur_bbox_vec; - } + std::vector tracking_flow(cv::Mat dst_mat, bool check_error = true) + { + if (sync_PyrLKOpticalFlow_gpu.empty()) { + std::cout << "sync_PyrLKOpticalFlow_gpu isn't initialized \n"; + return cur_bbox_vec; + } - int const old_gpu_id = cv::cuda::getDevice(); - if(old_gpu_id != gpu_id) - cv::cuda::setDevice(gpu_id); + int const old_gpu_id = cv::cuda::getDevice(); + if(old_gpu_id != gpu_id) + cv::cuda::setDevice(gpu_id); - if (dst_mat_gpu.cols == 0) { - dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type()); - dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1); - } + if (dst_mat_gpu.cols == 0) { + dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type()); + dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1); + } - //dst_grey_gpu.upload(dst_mat, stream); // use BGR - dst_mat_gpu.upload(dst_mat, stream); - cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 1, stream); + //dst_grey_gpu.upload(dst_mat, stream); // use BGR + dst_mat_gpu.upload(dst_mat, stream); + cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 1, stream); - if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) { - stream.waitForCompletion(); - src_grey_gpu = dst_grey_gpu.clone(); - cv::cuda::setDevice(old_gpu_id); - return cur_bbox_vec; - } + if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) { + stream.waitForCompletion(); + src_grey_gpu = dst_grey_gpu.clone(); + cv::cuda::setDevice(old_gpu_id); + return cur_bbox_vec; + } - ////sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu); // OpenCV 2.4.x - sync_PyrLKOpticalFlow_gpu->calc(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, err_gpu, stream); // OpenCV 3.x + ////sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu); // OpenCV 2.4.x + sync_PyrLKOpticalFlow_gpu->calc(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, err_gpu, stream); // OpenCV 3.x - cv::Mat cur_pts_flow_cpu; - cur_pts_flow_gpu.download(cur_pts_flow_cpu, stream); + cv::Mat cur_pts_flow_cpu; + cur_pts_flow_gpu.download(cur_pts_flow_cpu, stream); - dst_grey_gpu.copyTo(src_grey_gpu, stream); + dst_grey_gpu.copyTo(src_grey_gpu, stream); - cv::Mat err_cpu, status_cpu; - err_gpu.download(err_cpu, stream); - status_gpu.download(status_cpu, stream); + cv::Mat err_cpu, status_cpu; + err_gpu.download(err_cpu, stream); + status_gpu.download(status_cpu, stream); - stream.waitForCompletion(); + stream.waitForCompletion(); - std::vector result_bbox_vec; + std::vector result_bbox_vec; - if (err_cpu.cols == cur_bbox_vec.size() && status_cpu.cols == cur_bbox_vec.size()) - { - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) - { - cv::Point2f cur_key_pt = cur_pts_flow_cpu.at(0, i); - cv::Point2f prev_key_pt = prev_pts_flow_cpu.at(0, i); + if (err_cpu.cols == cur_bbox_vec.size() && status_cpu.cols == cur_bbox_vec.size()) + { + for (size_t i = 0; i < cur_bbox_vec.size(); ++i) + { + cv::Point2f cur_key_pt = cur_pts_flow_cpu.at(0, i); + cv::Point2f prev_key_pt = prev_pts_flow_cpu.at(0, i); - float moved_x = cur_key_pt.x - prev_key_pt.x; - float moved_y = cur_key_pt.y - prev_key_pt.y; + float moved_x = cur_key_pt.x - prev_key_pt.x; + float moved_y = cur_key_pt.y - prev_key_pt.y; - if (abs(moved_x) < 100 && abs(moved_y) < 100 && good_bbox_vec_flags[i]) - if (err_cpu.at(0, i) < flow_error && status_cpu.at(0, i) != 0 && - ((float)cur_bbox_vec[i].x + moved_x) > 0 && ((float)cur_bbox_vec[i].y + moved_y) > 0) - { - cur_bbox_vec[i].x += moved_x + 0.5; - cur_bbox_vec[i].y += moved_y + 0.5; - result_bbox_vec.push_back(cur_bbox_vec[i]); - } - else good_bbox_vec_flags[i] = false; - else good_bbox_vec_flags[i] = false; + if (abs(moved_x) < 100 && abs(moved_y) < 100 && good_bbox_vec_flags[i]) + if (err_cpu.at(0, i) < flow_error && status_cpu.at(0, i) != 0 && + ((float)cur_bbox_vec[i].x + moved_x) > 0 && ((float)cur_bbox_vec[i].y + moved_y) > 0) + { + cur_bbox_vec[i].x += moved_x + 0.5; + cur_bbox_vec[i].y += moved_y + 0.5; + result_bbox_vec.push_back(cur_bbox_vec[i]); + } + else good_bbox_vec_flags[i] = false; + else good_bbox_vec_flags[i] = false; - //if(!check_error && !good_bbox_vec_flags[i]) result_bbox_vec.push_back(cur_bbox_vec[i]); - } - } + //if(!check_error && !good_bbox_vec_flags[i]) result_bbox_vec.push_back(cur_bbox_vec[i]); + } + } - cur_pts_flow_gpu.swap(prev_pts_flow_gpu); - cur_pts_flow_cpu.copyTo(prev_pts_flow_cpu); + cur_pts_flow_gpu.swap(prev_pts_flow_gpu); + cur_pts_flow_cpu.copyTo(prev_pts_flow_cpu); - if (old_gpu_id != gpu_id) - cv::cuda::setDevice(old_gpu_id); + if (old_gpu_id != gpu_id) + cv::cuda::setDevice(old_gpu_id); - return result_bbox_vec; - } + return result_bbox_vec; + } }; @@ -339,314 +339,314 @@ public: class Tracker_optflow { public: - const int flow_error; + const int flow_error; - Tracker_optflow(int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) : - flow_error((_flow_error > 0)? _flow_error:(win_size*4)) - { - sync_PyrLKOpticalFlow = cv::SparsePyrLKOpticalFlow::create(); - sync_PyrLKOpticalFlow->setWinSize(cv::Size(win_size, win_size)); // 9, 15, 21, 31 - sync_PyrLKOpticalFlow->setMaxLevel(max_level); // +- 3 pt + Tracker_optflow(int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) : + flow_error((_flow_error > 0)? _flow_error:(win_size*4)) + { + sync_PyrLKOpticalFlow = cv::SparsePyrLKOpticalFlow::create(); + sync_PyrLKOpticalFlow->setWinSize(cv::Size(win_size, win_size)); // 9, 15, 21, 31 + sync_PyrLKOpticalFlow->setMaxLevel(max_level); // +- 3 pt - } + } - // just to avoid extra allocations - cv::Mat dst_grey; - cv::Mat prev_pts_flow, cur_pts_flow; - cv::Mat status, err; + // just to avoid extra allocations + cv::Mat dst_grey; + cv::Mat prev_pts_flow, cur_pts_flow; + cv::Mat status, err; - cv::Mat src_grey; // used in both functions - cv::Ptr sync_PyrLKOpticalFlow; + cv::Mat src_grey; // used in both functions + cv::Ptr sync_PyrLKOpticalFlow; - std::vector cur_bbox_vec; - std::vector good_bbox_vec_flags; + std::vector cur_bbox_vec; + std::vector good_bbox_vec_flags; - void update_cur_bbox_vec(std::vector _cur_bbox_vec) - { - cur_bbox_vec = _cur_bbox_vec; - good_bbox_vec_flags = std::vector(cur_bbox_vec.size(), true); - cv::Mat prev_pts, cur_pts_flow; + void update_cur_bbox_vec(std::vector _cur_bbox_vec) + { + cur_bbox_vec = _cur_bbox_vec; + good_bbox_vec_flags = std::vector(cur_bbox_vec.size(), true); + cv::Mat prev_pts, cur_pts_flow; - for (auto &i : cur_bbox_vec) { - float x_center = (i.x + i.w / 2.0F); - float y_center = (i.y + i.h / 2.0F); - prev_pts.push_back(cv::Point2f(x_center, y_center)); - } + for (auto &i : cur_bbox_vec) { + float x_center = (i.x + i.w / 2.0F); + float y_center = (i.y + i.h / 2.0F); + prev_pts.push_back(cv::Point2f(x_center, y_center)); + } - if (prev_pts.rows == 0) - prev_pts_flow = cv::Mat(); - else - cv::transpose(prev_pts, prev_pts_flow); - } + if (prev_pts.rows == 0) + prev_pts_flow = cv::Mat(); + else + cv::transpose(prev_pts, prev_pts_flow); + } - void update_tracking_flow(cv::Mat new_src_mat, std::vector _cur_bbox_vec) - { - if (new_src_mat.channels() == 3) { + void update_tracking_flow(cv::Mat new_src_mat, std::vector _cur_bbox_vec) + { + if (new_src_mat.channels() == 3) { - update_cur_bbox_vec(_cur_bbox_vec); + update_cur_bbox_vec(_cur_bbox_vec); - cv::cvtColor(new_src_mat, src_grey, CV_BGR2GRAY, 1); - } - } + cv::cvtColor(new_src_mat, src_grey, CV_BGR2GRAY, 1); + } + } - std::vector tracking_flow(cv::Mat new_dst_mat, bool check_error = true) - { - if (sync_PyrLKOpticalFlow.empty()) { - std::cout << "sync_PyrLKOpticalFlow isn't initialized \n"; - return cur_bbox_vec; - } + std::vector tracking_flow(cv::Mat new_dst_mat, bool check_error = true) + { + if (sync_PyrLKOpticalFlow.empty()) { + std::cout << "sync_PyrLKOpticalFlow isn't initialized \n"; + return cur_bbox_vec; + } - cv::cvtColor(new_dst_mat, dst_grey, CV_BGR2GRAY, 1); + cv::cvtColor(new_dst_mat, dst_grey, CV_BGR2GRAY, 1); - if (src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols) { - src_grey = dst_grey.clone(); - return cur_bbox_vec; - } + if (src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols) { + src_grey = dst_grey.clone(); + return cur_bbox_vec; + } - if (prev_pts_flow.cols < 1) { - return cur_bbox_vec; - } + if (prev_pts_flow.cols < 1) { + return cur_bbox_vec; + } - ////sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu); // OpenCV 2.4.x - sync_PyrLKOpticalFlow->calc(src_grey, dst_grey, prev_pts_flow, cur_pts_flow, status, err); // OpenCV 3.x + ////sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu); // OpenCV 2.4.x + sync_PyrLKOpticalFlow->calc(src_grey, dst_grey, prev_pts_flow, cur_pts_flow, status, err); // OpenCV 3.x - dst_grey.copyTo(src_grey); + dst_grey.copyTo(src_grey); - std::vector result_bbox_vec; + std::vector result_bbox_vec; - if (err.rows == cur_bbox_vec.size() && status.rows == cur_bbox_vec.size()) - { - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) - { - cv::Point2f cur_key_pt = cur_pts_flow.at(0, i); - cv::Point2f prev_key_pt = prev_pts_flow.at(0, i); + if (err.rows == cur_bbox_vec.size() && status.rows == cur_bbox_vec.size()) + { + for (size_t i = 0; i < cur_bbox_vec.size(); ++i) + { + cv::Point2f cur_key_pt = cur_pts_flow.at(0, i); + cv::Point2f prev_key_pt = prev_pts_flow.at(0, i); - float moved_x = cur_key_pt.x - prev_key_pt.x; - float moved_y = cur_key_pt.y - prev_key_pt.y; + float moved_x = cur_key_pt.x - prev_key_pt.x; + float moved_y = cur_key_pt.y - prev_key_pt.y; - if (abs(moved_x) < 100 && abs(moved_y) < 100 && good_bbox_vec_flags[i]) - if (err.at(0, i) < flow_error && status.at(0, i) != 0 && - ((float)cur_bbox_vec[i].x + moved_x) > 0 && ((float)cur_bbox_vec[i].y + moved_y) > 0) - { - cur_bbox_vec[i].x += moved_x + 0.5; - cur_bbox_vec[i].y += moved_y + 0.5; - result_bbox_vec.push_back(cur_bbox_vec[i]); - } - else good_bbox_vec_flags[i] = false; - else good_bbox_vec_flags[i] = false; + if (abs(moved_x) < 100 && abs(moved_y) < 100 && good_bbox_vec_flags[i]) + if (err.at(0, i) < flow_error && status.at(0, i) != 0 && + ((float)cur_bbox_vec[i].x + moved_x) > 0 && ((float)cur_bbox_vec[i].y + moved_y) > 0) + { + cur_bbox_vec[i].x += moved_x + 0.5; + cur_bbox_vec[i].y += moved_y + 0.5; + result_bbox_vec.push_back(cur_bbox_vec[i]); + } + else good_bbox_vec_flags[i] = false; + else good_bbox_vec_flags[i] = false; - //if(!check_error && !good_bbox_vec_flags[i]) result_bbox_vec.push_back(cur_bbox_vec[i]); - } - } + //if(!check_error && !good_bbox_vec_flags[i]) result_bbox_vec.push_back(cur_bbox_vec[i]); + } + } - prev_pts_flow = cur_pts_flow.clone(); + prev_pts_flow = cur_pts_flow.clone(); - return result_bbox_vec; - } + return result_bbox_vec; + } }; #else class Tracker_optflow {}; -#endif // defined(TRACK_OPTFLOW) && defined(OPENCV) +#endif // defined(TRACK_OPTFLOW) && defined(OPENCV) #ifdef OPENCV static cv::Scalar obj_id_to_color(int obj_id) { - int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } }; - int const offset = obj_id * 123457 % 6; - int const color_scale = 150 + (obj_id * 123457) % 100; - cv::Scalar color(colors[offset][0], colors[offset][1], colors[offset][2]); - color *= color_scale; - return color; + int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } }; + int const offset = obj_id * 123457 % 6; + int const color_scale = 150 + (obj_id * 123457) % 100; + cv::Scalar color(colors[offset][0], colors[offset][1], colors[offset][2]); + color *= color_scale; + return color; } class preview_boxes_t { - enum { frames_history = 30 }; // how long to keep the history saved + enum { frames_history = 30 }; // how long to keep the history saved - struct preview_box_track_t { - unsigned int track_id, obj_id, last_showed_frames_ago; - bool current_detection; - bbox_t bbox; - cv::Mat mat_obj, mat_resized_obj; - preview_box_track_t() : track_id(0), obj_id(0), last_showed_frames_ago(frames_history), current_detection(false) {} - }; - std::vector preview_box_track_id; - size_t const preview_box_size, bottom_offset; - bool const one_off_detections; + struct preview_box_track_t { + unsigned int track_id, obj_id, last_showed_frames_ago; + bool current_detection; + bbox_t bbox; + cv::Mat mat_obj, mat_resized_obj; + preview_box_track_t() : track_id(0), obj_id(0), last_showed_frames_ago(frames_history), current_detection(false) {} + }; + std::vector preview_box_track_id; + size_t const preview_box_size, bottom_offset; + bool const one_off_detections; public: - preview_boxes_t(size_t _preview_box_size = 100, size_t _bottom_offset = 100, bool _one_off_detections = false) : - preview_box_size(_preview_box_size), bottom_offset(_bottom_offset), one_off_detections(_one_off_detections) - {} + preview_boxes_t(size_t _preview_box_size = 100, size_t _bottom_offset = 100, bool _one_off_detections = false) : + preview_box_size(_preview_box_size), bottom_offset(_bottom_offset), one_off_detections(_one_off_detections) + {} - void set(cv::Mat src_mat, std::vector result_vec) - { - size_t const count_preview_boxes = src_mat.cols / preview_box_size; - if (preview_box_track_id.size() != count_preview_boxes) preview_box_track_id.resize(count_preview_boxes); + void set(cv::Mat src_mat, std::vector result_vec) + { + size_t const count_preview_boxes = src_mat.cols / preview_box_size; + if (preview_box_track_id.size() != count_preview_boxes) preview_box_track_id.resize(count_preview_boxes); - // increment frames history - for (auto &i : preview_box_track_id) - i.last_showed_frames_ago = std::min((unsigned)frames_history, i.last_showed_frames_ago + 1); + // increment frames history + for (auto &i : preview_box_track_id) + i.last_showed_frames_ago = std::min((unsigned)frames_history, i.last_showed_frames_ago + 1); - // occupy empty boxes - for (auto &k : result_vec) { - bool found = false; - // find the same (track_id) - for (auto &i : preview_box_track_id) { - if (i.track_id == k.track_id) { - if (!one_off_detections) i.last_showed_frames_ago = 0; // for tracked objects - found = true; - break; - } - } - if (!found) { - // find empty box - for (auto &i : preview_box_track_id) { - if (i.last_showed_frames_ago == frames_history) { - if (!one_off_detections && k.frames_counter == 0) break; // don't show if obj isn't tracked yet - i.track_id = k.track_id; - i.obj_id = k.obj_id; - i.bbox = k; - i.last_showed_frames_ago = 0; - break; - } - } - } - } + // occupy empty boxes + for (auto &k : result_vec) { + bool found = false; + // find the same (track_id) + for (auto &i : preview_box_track_id) { + if (i.track_id == k.track_id) { + if (!one_off_detections) i.last_showed_frames_ago = 0; // for tracked objects + found = true; + break; + } + } + if (!found) { + // find empty box + for (auto &i : preview_box_track_id) { + if (i.last_showed_frames_ago == frames_history) { + if (!one_off_detections && k.frames_counter == 0) break; // don't show if obj isn't tracked yet + i.track_id = k.track_id; + i.obj_id = k.obj_id; + i.bbox = k; + i.last_showed_frames_ago = 0; + break; + } + } + } + } - // draw preview box (from old or current frame) - for (size_t i = 0; i < preview_box_track_id.size(); ++i) - { - // get object image - cv::Mat dst = preview_box_track_id[i].mat_resized_obj; - preview_box_track_id[i].current_detection = false; + // draw preview box (from old or current frame) + for (size_t i = 0; i < preview_box_track_id.size(); ++i) + { + // get object image + cv::Mat dst = preview_box_track_id[i].mat_resized_obj; + preview_box_track_id[i].current_detection = false; - for (auto &k : result_vec) { - if (preview_box_track_id[i].track_id == k.track_id) { - if (one_off_detections && preview_box_track_id[i].last_showed_frames_ago > 0) { - preview_box_track_id[i].last_showed_frames_ago = frames_history; break; - } - bbox_t b = k; - cv::Rect r(b.x, b.y, b.w, b.h); - cv::Rect img_rect(cv::Point2i(0, 0), src_mat.size()); - cv::Rect rect_roi = r & img_rect; - if (rect_roi.width > 1 || rect_roi.height > 1) { - cv::Mat roi = src_mat(rect_roi); - cv::resize(roi, dst, cv::Size(preview_box_size, preview_box_size), cv::INTER_NEAREST); - preview_box_track_id[i].mat_obj = roi.clone(); - preview_box_track_id[i].mat_resized_obj = dst.clone(); - preview_box_track_id[i].current_detection = true; - preview_box_track_id[i].bbox = k; - } - break; - } - } - } - } + for (auto &k : result_vec) { + if (preview_box_track_id[i].track_id == k.track_id) { + if (one_off_detections && preview_box_track_id[i].last_showed_frames_ago > 0) { + preview_box_track_id[i].last_showed_frames_ago = frames_history; break; + } + bbox_t b = k; + cv::Rect r(b.x, b.y, b.w, b.h); + cv::Rect img_rect(cv::Point2i(0, 0), src_mat.size()); + cv::Rect rect_roi = r & img_rect; + if (rect_roi.width > 1 || rect_roi.height > 1) { + cv::Mat roi = src_mat(rect_roi); + cv::resize(roi, dst, cv::Size(preview_box_size, preview_box_size), cv::INTER_NEAREST); + preview_box_track_id[i].mat_obj = roi.clone(); + preview_box_track_id[i].mat_resized_obj = dst.clone(); + preview_box_track_id[i].current_detection = true; + preview_box_track_id[i].bbox = k; + } + break; + } + } + } + } - void draw(cv::Mat draw_mat, bool show_small_boxes = false) - { - // draw preview box (from old or current frame) - for (size_t i = 0; i < preview_box_track_id.size(); ++i) - { - auto &prev_box = preview_box_track_id[i]; + void draw(cv::Mat draw_mat, bool show_small_boxes = false) + { + // draw preview box (from old or current frame) + for (size_t i = 0; i < preview_box_track_id.size(); ++i) + { + auto &prev_box = preview_box_track_id[i]; - // draw object image - cv::Mat dst = prev_box.mat_resized_obj; - if (prev_box.last_showed_frames_ago < frames_history && - dst.size() == cv::Size(preview_box_size, preview_box_size)) - { - cv::Rect dst_rect_roi(cv::Point2i(i * preview_box_size, draw_mat.rows - bottom_offset), dst.size()); - cv::Mat dst_roi = draw_mat(dst_rect_roi); - dst.copyTo(dst_roi); + // draw object image + cv::Mat dst = prev_box.mat_resized_obj; + if (prev_box.last_showed_frames_ago < frames_history && + dst.size() == cv::Size(preview_box_size, preview_box_size)) + { + cv::Rect dst_rect_roi(cv::Point2i(i * preview_box_size, draw_mat.rows - bottom_offset), dst.size()); + cv::Mat dst_roi = draw_mat(dst_rect_roi); + dst.copyTo(dst_roi); - cv::Scalar color = obj_id_to_color(prev_box.obj_id); - int thickness = (prev_box.current_detection) ? 5 : 1; - cv::rectangle(draw_mat, dst_rect_roi, color, thickness); + cv::Scalar color = obj_id_to_color(prev_box.obj_id); + int thickness = (prev_box.current_detection) ? 5 : 1; + cv::rectangle(draw_mat, dst_rect_roi, color, thickness); - unsigned int const track_id = prev_box.track_id; - std::string track_id_str = (track_id > 0) ? std::to_string(track_id) : ""; - putText(draw_mat, track_id_str, dst_rect_roi.tl() - cv::Point2i(-4, 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.9, cv::Scalar(0, 0, 0), 2); + unsigned int const track_id = prev_box.track_id; + std::string track_id_str = (track_id > 0) ? std::to_string(track_id) : ""; + putText(draw_mat, track_id_str, dst_rect_roi.tl() - cv::Point2i(-4, 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.9, cv::Scalar(0, 0, 0), 2); - std::string size_str = std::to_string(prev_box.bbox.w) + "x" + std::to_string(prev_box.bbox.h); - putText(draw_mat, size_str, dst_rect_roi.tl() + cv::Point2i(0, 12), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); + std::string size_str = std::to_string(prev_box.bbox.w) + "x" + std::to_string(prev_box.bbox.h); + putText(draw_mat, size_str, dst_rect_roi.tl() + cv::Point2i(0, 12), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); - if (!one_off_detections && prev_box.current_detection) { - cv::line(draw_mat, dst_rect_roi.tl() + cv::Point2i(preview_box_size, 0), - cv::Point2i(prev_box.bbox.x, prev_box.bbox.y + prev_box.bbox.h), - color); - } + if (!one_off_detections && prev_box.current_detection) { + cv::line(draw_mat, dst_rect_roi.tl() + cv::Point2i(preview_box_size, 0), + cv::Point2i(prev_box.bbox.x, prev_box.bbox.y + prev_box.bbox.h), + color); + } - if (one_off_detections && show_small_boxes) { - cv::Rect src_rect_roi(cv::Point2i(prev_box.bbox.x, prev_box.bbox.y), - cv::Size(prev_box.bbox.w, prev_box.bbox.h)); - unsigned int const color_history = (255 * prev_box.last_showed_frames_ago) / frames_history; - color = cv::Scalar(255 - 3 * color_history, 255 - 2 * color_history, 255 - 1 * color_history); - if (prev_box.mat_obj.size() == src_rect_roi.size()) { - prev_box.mat_obj.copyTo(draw_mat(src_rect_roi)); - } - cv::rectangle(draw_mat, src_rect_roi, color, thickness); - putText(draw_mat, track_id_str, src_rect_roi.tl() - cv::Point2i(0, 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); - } - } - } - } + if (one_off_detections && show_small_boxes) { + cv::Rect src_rect_roi(cv::Point2i(prev_box.bbox.x, prev_box.bbox.y), + cv::Size(prev_box.bbox.w, prev_box.bbox.h)); + unsigned int const color_history = (255 * prev_box.last_showed_frames_ago) / frames_history; + color = cv::Scalar(255 - 3 * color_history, 255 - 2 * color_history, 255 - 1 * color_history); + if (prev_box.mat_obj.size() == src_rect_roi.size()) { + prev_box.mat_obj.copyTo(draw_mat(src_rect_roi)); + } + cv::rectangle(draw_mat, src_rect_roi, color, thickness); + putText(draw_mat, track_id_str, src_rect_roi.tl() - cv::Point2i(0, 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); + } + } + } + } }; -#endif // OPENCV +#endif // OPENCV //extern "C" { -#endif // __cplusplus +#endif // __cplusplus /* - // C - wrappers - YOLODLL_API void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id); - YOLODLL_API void delete_detector(); - YOLODLL_API bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size); - YOLODLL_API bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size); - YOLODLL_API bbox_t* detect(image_t img, int *result_size); - YOLODLL_API image_t load_img(char *image_filename); - YOLODLL_API void free_img(image_t m); + // C - wrappers + YOLODLL_API void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id); + YOLODLL_API void delete_detector(); + YOLODLL_API bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size); + YOLODLL_API bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size); + YOLODLL_API bbox_t* detect(image_t img, int *result_size); + YOLODLL_API image_t load_img(char *image_filename); + YOLODLL_API void free_img(image_t m); #ifdef __cplusplus -} // extern "C" +} // extern "C" static std::shared_ptr c_detector_ptr; static std::vector c_result_vec; void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id) { - c_detector_ptr = std::make_shared(cfg_filename, weight_filename, gpu_id); + c_detector_ptr = std::make_shared(cfg_filename, weight_filename, gpu_id); } void delete_detector() { c_detector_ptr.reset(); } bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size) { - c_result_vec = static_cast(c_detector_ptr.get())->detect(img, thresh, use_mean); - *result_size = c_result_vec.size(); - return c_result_vec.data(); + c_result_vec = static_cast(c_detector_ptr.get())->detect(img, thresh, use_mean); + *result_size = c_result_vec.size(); + return c_result_vec.data(); } bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size) { - c_result_vec = static_cast(c_detector_ptr.get())->detect_resized(img, init_w, init_h, thresh, use_mean); - *result_size = c_result_vec.size(); - return c_result_vec.data(); + c_result_vec = static_cast(c_detector_ptr.get())->detect_resized(img, init_w, init_h, thresh, use_mean); + *result_size = c_result_vec.size(); + return c_result_vec.data(); } bbox_t* detect(image_t img, int *result_size) { - return detect_custom(img, 0.24, true, result_size); + return detect_custom(img, 0.24, true, result_size); } image_t load_img(char *image_filename) { - return static_cast(c_detector_ptr.get())->load_image(image_filename); + return static_cast(c_detector_ptr.get())->load_image(image_filename); } void free_img(image_t m) { - static_cast(c_detector_ptr.get())->free_image(m); + static_cast(c_detector_ptr.get())->free_image(m); } -#endif // __cplusplus +#endif // __cplusplus */