mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
If there is excessive GPU-RAM consumption by CUDNN then then do not use Workspace
This commit is contained in:
@ -137,7 +137,7 @@ size_t get_workspace_size(layer l){
|
||||
|
||||
#ifdef GPU
|
||||
#ifdef CUDNN
|
||||
void cudnn_convolutional_setup(layer *l)
|
||||
void cudnn_convolutional_setup(layer *l, int cudnn_preference)
|
||||
{
|
||||
cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
|
||||
cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
@ -151,12 +151,21 @@ void cudnn_convolutional_setup(layer *l)
|
||||
#else
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1
|
||||
#endif
|
||||
int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
|
||||
int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST;
|
||||
int backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST;
|
||||
if (cudnn_preference == cudnn_smallest) {
|
||||
forward_algo = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
|
||||
backward_algo = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
|
||||
backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE;
|
||||
}
|
||||
|
||||
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
|
||||
l->srcTensorDesc,
|
||||
l->weightDesc,
|
||||
l->convDesc,
|
||||
l->dstTensorDesc,
|
||||
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
|
||||
forward_algo,
|
||||
0,
|
||||
&l->fw_algo);
|
||||
cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
|
||||
@ -164,7 +173,7 @@ void cudnn_convolutional_setup(layer *l)
|
||||
l->ddstTensorDesc,
|
||||
l->convDesc,
|
||||
l->dsrcTensorDesc,
|
||||
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
|
||||
backward_algo,
|
||||
0,
|
||||
&l->bd_algo);
|
||||
cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
|
||||
@ -172,7 +181,7 @@ void cudnn_convolutional_setup(layer *l)
|
||||
l->ddstTensorDesc,
|
||||
l->convDesc,
|
||||
l->dweightDesc,
|
||||
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
|
||||
backward_filter,
|
||||
0,
|
||||
&l->bf_algo);
|
||||
}
|
||||
@ -306,7 +315,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
||||
cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
|
||||
cudnnCreateFilterDescriptor(&l.dweightDesc);
|
||||
cudnnCreateConvolutionDescriptor(&l.convDesc);
|
||||
cudnn_convolutional_setup(&l);
|
||||
cudnn_convolutional_setup(&l, cudnn_fastest);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
@ -396,10 +405,22 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
|
||||
}
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnn_convolutional_setup(l);
|
||||
cudnn_convolutional_setup(l, cudnn_fastest);
|
||||
#endif
|
||||
#endif
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
|
||||
#ifdef CUDNN
|
||||
// check for excessive memory consumption
|
||||
size_t free_byte;
|
||||
size_t total_byte;
|
||||
check_error(cudaMemGetInfo(&free_byte, &total_byte));
|
||||
if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) {
|
||||
printf(" used slow CUDNN algo without Workspace! \n");
|
||||
cudnn_convolutional_setup(l, cudnn_smallest);
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void add_bias(float *output, float *biases, int batch, int n, int size)
|
||||
|
Reference in New Issue
Block a user