mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Accelerated by another 5% using FP16/32 Batch-norm for Tensor Cores.
This commit is contained in:
@ -178,6 +178,8 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
|
||||
// batch norm
|
||||
cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
|
||||
cudnnSetTensor4dDescriptor(l->normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
|
||||
cudnnSetTensor4dDescriptor(l->normDstTensorDescF16, CUDNN_TENSOR_NCHW, data_type, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
#if(CUDNN_MAJOR >= 6)
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); // cudnn >= 6.0
|
||||
#else
|
||||
@ -379,6 +381,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnnCreateTensorDescriptor(&l.normDstTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.normDstTensorDescF16);
|
||||
cudnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.srcTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
|
Reference in New Issue
Block a user