Temporary Slow implementation of XNOR on CUDA (shared_memory)

This commit is contained in:
AlexeyAB
2018-09-01 00:03:25 +03:00
parent c4a9e3422e
commit 007878393f
6 changed files with 187 additions and 101 deletions

View File

@ -629,6 +629,7 @@ void binary_align_weights(convolutional_layer *l)
cudaError_t status;
l->align_workspace_size = l->bit_align * l->size * l->size * l->c;
status = cudaMalloc((void **)&l->align_workspace_gpu, l->align_workspace_size * sizeof(float));
status = cudaMalloc((void **)&l->transposed_align_workspace_gpu, l->align_workspace_size * sizeof(float));
check_error(status);
//l->align_bit_weights_gpu = cuda_make_array(l->align_bit_weights, l->align_bit_weights_size * sizeof(char)/sizeof(float));
@ -638,6 +639,7 @@ void binary_align_weights(convolutional_layer *l)
check_error(status);
l->mean_arr_gpu = cuda_make_array(l->mean_arr, l->n);
cudaDeviceSynchronize();
#endif // GPU
free(align_weights);