Try to fuse conv_xnor+shortcut -> conv_xnor

This commit is contained in:
AlexeyAB
2019-02-12 02:05:15 +03:00
parent 9e138adf09
commit 5448e07445
5 changed files with 51 additions and 18 deletions

View File

@ -300,7 +300,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
//start_timer();
gemm_nn_custom_bin_mean_transposed_gpu(m, n, k,
(unsigned char *)l.align_bit_weights_gpu, new_ldb, (unsigned char *)l.transposed_align_workspace_gpu,
new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY);
new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY,
l.bin_conv_shortcut_in_gpu, l.bin_conv_shortcut_out_gpu);
//cudaDeviceSynchronize();
//stop_timer_and_show_name("gemm_nn_custom_bin_mean_transposed_gpu");
@ -370,7 +371,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
//start_timer();
gemm_nn_custom_bin_mean_transposed_gpu(m, n, k,
(unsigned char *)l.align_bit_weights_gpu, new_ldb, (unsigned char *)l.transposed_align_workspace_gpu,
new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY);
new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY,
l.bin_conv_shortcut_in_gpu, l.bin_conv_shortcut_out_gpu);
//cudaDeviceSynchronize();
//stop_timer_and_show_name("gemm_nn_custom_bin_mean_transposed_gpu");
//}