get_connected_workspace_size() and get_convolutional_workspace_size()

2023-08-10 21:13:14 +03:00 · 2019-02-08 00:51:20 +03:00
parent 6832290eee
commit 9e07605bc5
3 changed files with 16 additions and 11 deletions
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@ -11,8 +11,11 @@
 #include <stdlib.h>
 #include <string.h>
-static size_t get_connected_workspace_size(layer l) {
+size_t get_connected_workspace_size(layer l)
 {
 #ifdef CUDNN
    return get_convolutional_workspace_size(l);
    /*
    if (gpu_index >= 0) {
        size_t most = 0;
        size_t s = 0;
@ -42,6 +45,7 @@ static size_t get_connected_workspace_size(layer l) {
        if (s > most) most = s;
        return most;
    }
    */
 #endif
    return 0;
 }
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -100,7 +100,7 @@ image get_convolutional_delta(convolutional_layer l)
    return float_to_image(w,h,c,l.delta);
 }
-size_t get_workspace_size(layer l){
+size_t get_workspace_size32(layer l){
 #ifdef CUDNN
    if(gpu_index >= 0){
        size_t most = 0;
@ -173,6 +173,12 @@ size_t get_workspace_size16(layer l) {
    //return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float);
 }
 size_t get_convolutional_workspace_size(layer l) {
    size_t workspace_size = get_workspace_size32(l);
    size_t workspace_size16 = get_workspace_size16(l);
    if (workspace_size16 > workspace_size) workspace_size = workspace_size16;
    return workspace_size;
 }
 #ifdef GPU
 #ifdef CUDNN
 void create_convolutional_cudnn_tensors(layer *l)
@ -462,9 +468,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
 #endif
    }
 #endif
-    l.workspace_size = get_workspace_size(l);
+    l.workspace_size = get_convolutional_workspace_size(l);
    size_t workspace_size16 = get_workspace_size16(l);
    if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16;
    //fprintf(stderr, "conv  %5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
    l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
@ -566,9 +570,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
    cudnn_convolutional_setup(l, cudnn_fastest);
 #endif
 #endif
-    l->workspace_size = get_workspace_size(*l);
+    l->workspace_size = get_convolutional_workspace_size(*l);
    size_t workspace_size16 = get_workspace_size16(*l);
    if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16;
 #ifdef CUDNN
    // check for excessive memory consumption
@ -578,9 +580,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
    if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) {
        printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2);
        cudnn_convolutional_setup(l, cudnn_smallest);
-        l->workspace_size = get_workspace_size(*l);
+        l->workspace_size = get_convolutional_workspace_size(*l);
        size_t workspace_size16 = get_workspace_size16(*l);
        if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16;
    }
 #endif
 }
--- a/src/convolutional_layer.h
+++ b/src/convolutional_layer.h
@ -22,6 +22,7 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
 #ifdef CUDNN
 void cudnn_convolutional_setup(layer *l, int cudnn_preference);
 void create_convolutional_cudnn_tensors(layer *l);
 size_t get_convolutional_workspace_size(layer l);
 void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
 #endif
 #endif