mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Fix OpenCV compilation. maxpool_layer on GPU uses cuDNN.
This commit is contained in:
@ -370,6 +370,8 @@ int main(int argc, char **argv)
|
|||||||
gpu_index = find_int_arg(argc, argv, "-i", 0);
|
gpu_index = find_int_arg(argc, argv, "-i", 0);
|
||||||
if(find_arg(argc, argv, "-nogpu")) {
|
if(find_arg(argc, argv, "-nogpu")) {
|
||||||
gpu_index = -1;
|
gpu_index = -1;
|
||||||
|
printf("\n Currently Darknet doesn't support -nogpu flag. If you want to use CPU - please compile Darknet with GPU=0 in the Makefile, or compile darknet_no_gpu.sln on Windows.\n");
|
||||||
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef GPU
|
#ifndef GPU
|
||||||
|
@ -336,8 +336,8 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
|
|||||||
// if truth (box for object) is smaller than 1x1 pix
|
// if truth (box for object) is smaller than 1x1 pix
|
||||||
char buff[256];
|
char buff[256];
|
||||||
if (id >= classes) {
|
if (id >= classes) {
|
||||||
printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, classes);
|
printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, (classes-1));
|
||||||
sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, classes);
|
sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, (classes-1));
|
||||||
system(buff);
|
system(buff);
|
||||||
getchar();
|
getchar();
|
||||||
++sub;
|
++sub;
|
||||||
|
@ -21,6 +21,8 @@
|
|||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
#endif
|
#endif
|
||||||
#include "http_stream.h"
|
#include "http_stream.h"
|
||||||
|
|
||||||
|
#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 )
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern int check_mistakes;
|
extern int check_mistakes;
|
||||||
|
@ -307,6 +307,7 @@ struct layer{
|
|||||||
cudnnConvolutionFwdAlgo_t fw_algo;
|
cudnnConvolutionFwdAlgo_t fw_algo;
|
||||||
cudnnConvolutionBwdDataAlgo_t bd_algo;
|
cudnnConvolutionBwdDataAlgo_t bd_algo;
|
||||||
cudnnConvolutionBwdFilterAlgo_t bf_algo;
|
cudnnConvolutionBwdFilterAlgo_t bf_algo;
|
||||||
|
cudnnPoolingDescriptor_t poolingDesc;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
@ -47,7 +47,27 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
|||||||
l.indexes_gpu = cuda_make_int_array(output_size);
|
l.indexes_gpu = cuda_make_int_array(output_size);
|
||||||
l.output_gpu = cuda_make_array(l.output, output_size);
|
l.output_gpu = cuda_make_array(l.output, output_size);
|
||||||
l.delta_gpu = cuda_make_array(l.delta, output_size);
|
l.delta_gpu = cuda_make_array(l.delta, output_size);
|
||||||
#endif
|
#ifdef CUDNN
|
||||||
|
cudnnStatus_t maxpool_status;
|
||||||
|
maxpool_status = cudnnCreatePoolingDescriptor(&l.poolingDesc);
|
||||||
|
|
||||||
|
maxpool_status = cudnnSetPooling2dDescriptor(
|
||||||
|
l.poolingDesc,
|
||||||
|
CUDNN_POOLING_MAX,
|
||||||
|
CUDNN_PROPAGATE_NAN, // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN
|
||||||
|
l.size,
|
||||||
|
l.size,
|
||||||
|
0, //l.pad,
|
||||||
|
0, //l.pad,
|
||||||
|
l.stride,
|
||||||
|
l.stride);
|
||||||
|
|
||||||
|
cudnnCreateTensorDescriptor(&l.srcTensorDesc);
|
||||||
|
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||||
|
cudnnSetTensor4dDescriptor(l.srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.c, l.h, l.w);
|
||||||
|
cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||||
|
#endif // CUDNN
|
||||||
|
#endif // GPU
|
||||||
l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
|
l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
|
||||||
fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops);
|
fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops);
|
||||||
return l;
|
return l;
|
||||||
|
@ -86,6 +86,33 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
|
|||||||
|
|
||||||
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#ifdef CUDNN
|
||||||
|
if (!state.train) {// && layer.stride == layer.size) {
|
||||||
|
// cudnnPoolingBackward
|
||||||
|
cudnnStatus_t maxpool_status;
|
||||||
|
|
||||||
|
float alpha = 1, beta = 0;
|
||||||
|
maxpool_status = cudnnPoolingForward(
|
||||||
|
cudnn_handle(),
|
||||||
|
layer.poolingDesc,
|
||||||
|
&alpha,
|
||||||
|
layer.srcTensorDesc,
|
||||||
|
state.input,
|
||||||
|
&beta,
|
||||||
|
layer.dstTensorDesc,
|
||||||
|
layer.output_gpu);
|
||||||
|
|
||||||
|
//maxpool_status = cudnnDestroyPoolingDescriptor(poolingDesc);
|
||||||
|
//cudnnDestroyTensorDescriptor(layer.srcTensorDesc);
|
||||||
|
//cudnnDestroyTensorDescriptor(layer.dstTensorDesc);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int h = layer.out_h;
|
int h = layer.out_h;
|
||||||
int w = layer.out_w;
|
int w = layer.out_w;
|
||||||
int c = layer.c;
|
int c = layer.c;
|
||||||
|
Reference in New Issue
Block a user