mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Try to fuse conv_xnor+shortcut -> conv_xnor
This commit is contained in:
@ -300,7 +300,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
||||
//start_timer();
|
||||
gemm_nn_custom_bin_mean_transposed_gpu(m, n, k,
|
||||
(unsigned char *)l.align_bit_weights_gpu, new_ldb, (unsigned char *)l.transposed_align_workspace_gpu,
|
||||
new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY);
|
||||
new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY,
|
||||
l.bin_conv_shortcut_in_gpu, l.bin_conv_shortcut_out_gpu);
|
||||
//cudaDeviceSynchronize();
|
||||
//stop_timer_and_show_name("gemm_nn_custom_bin_mean_transposed_gpu");
|
||||
|
||||
@ -370,7 +371,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
||||
//start_timer();
|
||||
gemm_nn_custom_bin_mean_transposed_gpu(m, n, k,
|
||||
(unsigned char *)l.align_bit_weights_gpu, new_ldb, (unsigned char *)l.transposed_align_workspace_gpu,
|
||||
new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY);
|
||||
new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY,
|
||||
l.bin_conv_shortcut_in_gpu, l.bin_conv_shortcut_out_gpu);
|
||||
//cudaDeviceSynchronize();
|
||||
//stop_timer_and_show_name("gemm_nn_custom_bin_mean_transposed_gpu");
|
||||
//}
|
||||
|
Reference in New Issue
Block a user