mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
get_connected_workspace_size() and get_convolutional_workspace_size()
This commit is contained in:
@ -11,8 +11,11 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static size_t get_connected_workspace_size(layer l) {
|
||||
size_t get_connected_workspace_size(layer l)
|
||||
{
|
||||
#ifdef CUDNN
|
||||
return get_convolutional_workspace_size(l);
|
||||
/*
|
||||
if (gpu_index >= 0) {
|
||||
size_t most = 0;
|
||||
size_t s = 0;
|
||||
@ -42,6 +45,7 @@ static size_t get_connected_workspace_size(layer l) {
|
||||
if (s > most) most = s;
|
||||
return most;
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
@ -100,7 +100,7 @@ image get_convolutional_delta(convolutional_layer l)
|
||||
return float_to_image(w,h,c,l.delta);
|
||||
}
|
||||
|
||||
size_t get_workspace_size(layer l){
|
||||
size_t get_workspace_size32(layer l){
|
||||
#ifdef CUDNN
|
||||
if(gpu_index >= 0){
|
||||
size_t most = 0;
|
||||
@ -173,6 +173,12 @@ size_t get_workspace_size16(layer l) {
|
||||
//return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float);
|
||||
}
|
||||
|
||||
size_t get_convolutional_workspace_size(layer l) {
|
||||
size_t workspace_size = get_workspace_size32(l);
|
||||
size_t workspace_size16 = get_workspace_size16(l);
|
||||
if (workspace_size16 > workspace_size) workspace_size = workspace_size16;
|
||||
return workspace_size;
|
||||
}
|
||||
#ifdef GPU
|
||||
#ifdef CUDNN
|
||||
void create_convolutional_cudnn_tensors(layer *l)
|
||||
@ -462,9 +468,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
l.workspace_size = get_workspace_size(l);
|
||||
size_t workspace_size16 = get_workspace_size16(l);
|
||||
if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16;
|
||||
l.workspace_size = get_convolutional_workspace_size(l);
|
||||
|
||||
//fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
|
||||
l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
|
||||
@ -566,9 +570,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
|
||||
cudnn_convolutional_setup(l, cudnn_fastest);
|
||||
#endif
|
||||
#endif
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
size_t workspace_size16 = get_workspace_size16(*l);
|
||||
if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16;
|
||||
l->workspace_size = get_convolutional_workspace_size(*l);
|
||||
|
||||
#ifdef CUDNN
|
||||
// check for excessive memory consumption
|
||||
@ -578,9 +580,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
|
||||
if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) {
|
||||
printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2);
|
||||
cudnn_convolutional_setup(l, cudnn_smallest);
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
size_t workspace_size16 = get_workspace_size16(*l);
|
||||
if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16;
|
||||
l->workspace_size = get_convolutional_workspace_size(*l);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
|
||||
#ifdef CUDNN
|
||||
void cudnn_convolutional_setup(layer *l, int cudnn_preference);
|
||||
void create_convolutional_cudnn_tensors(layer *l);
|
||||
size_t get_convolutional_workspace_size(layer l);
|
||||
void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
|
||||
#endif
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user