Accelerated by another 5% using FP16/32 Batch-norm for Tensor Cores.

This commit is contained in:
AlexeyAB
2018-04-17 02:51:11 +03:00
parent 701f4fab63
commit 9bae70b225
5 changed files with 105 additions and 20 deletions

View File

@ -281,7 +281,7 @@ struct layer{
#ifdef CUDNN
cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc;
cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc;
cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc, normDstTensorDescF16;
cudnnFilterDescriptor_t weightDesc;
cudnnFilterDescriptor_t dweightDesc;
cudnnConvolutionDescriptor_t convDesc;