Fixed multi-GPU training for Tensor Cores

2023-08-10 21:13:14 +03:00 · 2018-03-09 19:44:46 +03:00
parent a6c51e3b75
commit 880cf187d8
5 changed files with 48 additions and 35 deletions
--- a/src/network.c
+++ b/src/network.c
@ -140,6 +140,11 @@ network make_network(int n)
    #ifdef GPU
    net.input_gpu = calloc(1, sizeof(float *));
    net.truth_gpu = calloc(1, sizeof(float *));
+
+	net.input16_gpu = calloc(1, sizeof(float *));
+	net.output16_gpu = calloc(1, sizeof(float *));
+	net.max_input16_size = calloc(1, sizeof(size_t));
+	net.max_output16_size = calloc(1, sizeof(size_t));
    #endif
    return net;
 }
@ -622,6 +627,13 @@ void free_network(network net)
 	if (*net.truth_gpu) cuda_free(*net.truth_gpu);
 	if (net.input_gpu) free(net.input_gpu);
 	if (net.truth_gpu) free(net.truth_gpu);
+
+	if (*net.input16_gpu) cuda_free(*net.input16_gpu);
+	if (*net.output16_gpu) cuda_free(*net.output16_gpu);
+	if (net.input16_gpu) free(net.input16_gpu);
+	if (net.output16_gpu) free(net.output16_gpu);
+	if (net.max_input16_size) free(net.max_input16_size);
+	if (net.max_output16_size) free(net.max_output16_size);
 #else
 	free(net.workspace);
 #endif