mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
get_connected_workspace_size() and get_convolutional_workspace_size()
This commit is contained in:
@ -11,8 +11,11 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
static size_t get_connected_workspace_size(layer l) {
|
size_t get_connected_workspace_size(layer l)
|
||||||
|
{
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
|
return get_convolutional_workspace_size(l);
|
||||||
|
/*
|
||||||
if (gpu_index >= 0) {
|
if (gpu_index >= 0) {
|
||||||
size_t most = 0;
|
size_t most = 0;
|
||||||
size_t s = 0;
|
size_t s = 0;
|
||||||
@ -42,6 +45,7 @@ static size_t get_connected_workspace_size(layer l) {
|
|||||||
if (s > most) most = s;
|
if (s > most) most = s;
|
||||||
return most;
|
return most;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -100,7 +100,7 @@ image get_convolutional_delta(convolutional_layer l)
|
|||||||
return float_to_image(w,h,c,l.delta);
|
return float_to_image(w,h,c,l.delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t get_workspace_size(layer l){
|
size_t get_workspace_size32(layer l){
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
if(gpu_index >= 0){
|
if(gpu_index >= 0){
|
||||||
size_t most = 0;
|
size_t most = 0;
|
||||||
@ -173,6 +173,12 @@ size_t get_workspace_size16(layer l) {
|
|||||||
//return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float);
|
//return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t get_convolutional_workspace_size(layer l) {
|
||||||
|
size_t workspace_size = get_workspace_size32(l);
|
||||||
|
size_t workspace_size16 = get_workspace_size16(l);
|
||||||
|
if (workspace_size16 > workspace_size) workspace_size = workspace_size16;
|
||||||
|
return workspace_size;
|
||||||
|
}
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
void create_convolutional_cudnn_tensors(layer *l)
|
void create_convolutional_cudnn_tensors(layer *l)
|
||||||
@ -462,9 +468,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
l.workspace_size = get_workspace_size(l);
|
l.workspace_size = get_convolutional_workspace_size(l);
|
||||||
size_t workspace_size16 = get_workspace_size16(l);
|
|
||||||
if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16;
|
|
||||||
|
|
||||||
//fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
|
//fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
|
||||||
l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
|
l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
|
||||||
@ -566,9 +570,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
|
|||||||
cudnn_convolutional_setup(l, cudnn_fastest);
|
cudnn_convolutional_setup(l, cudnn_fastest);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
l->workspace_size = get_workspace_size(*l);
|
l->workspace_size = get_convolutional_workspace_size(*l);
|
||||||
size_t workspace_size16 = get_workspace_size16(*l);
|
|
||||||
if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16;
|
|
||||||
|
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
// check for excessive memory consumption
|
// check for excessive memory consumption
|
||||||
@ -578,9 +580,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
|
|||||||
if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) {
|
if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) {
|
||||||
printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2);
|
printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2);
|
||||||
cudnn_convolutional_setup(l, cudnn_smallest);
|
cudnn_convolutional_setup(l, cudnn_smallest);
|
||||||
l->workspace_size = get_workspace_size(*l);
|
l->workspace_size = get_convolutional_workspace_size(*l);
|
||||||
size_t workspace_size16 = get_workspace_size16(*l);
|
|
||||||
if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
|
|||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
void cudnn_convolutional_setup(layer *l, int cudnn_preference);
|
void cudnn_convolutional_setup(layer *l, int cudnn_preference);
|
||||||
void create_convolutional_cudnn_tensors(layer *l);
|
void create_convolutional_cudnn_tensors(layer *l);
|
||||||
|
size_t get_convolutional_workspace_size(layer l);
|
||||||
void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
|
void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user