Try to use avx_hs() - slow and requires alignment 4096 bits < (l.size*l.size*l.c)

May be faster only from 8192 bits and more.
This commit is contained in:
AlexeyAB
2018-08-08 19:07:10 +03:00
parent 0a326e7afe
commit a284a7da8d
4 changed files with 978 additions and 870 deletions

View File

@ -862,6 +862,8 @@ void calculate_binary_weights(network net)
if (l->xnor) {
//printf("\n %d \n", j);
size_t ldb_align = 256; // 256bit for AVX2
if (l->size*l->size*l->c > 4096) ldb_align = 4096;
binary_transpose_align_weights(l, ldb_align);
}
}