CUDA minor performance improvement

This commit is contained in:
AlexeyAB
2019-01-16 18:08:11 +03:00
parent 4c05166215
commit 5343aa4235
7 changed files with 86 additions and 14 deletions

View File

@ -883,7 +883,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
//gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
//gemm_nn_custom(m, n, k, 1, a, k, b, n, c, n);
if (l.xnor && l.align_bit_weights && !state.train && (l.stride == 1 && l.pad == 1))
if (l.xnor && l.align_bit_weights && !state.train)
{
memset(b, 0, l.bit_align*l.size*l.size*l.c * sizeof(float));