diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index b8065fdf..a3494541 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -397,7 +397,9 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int l.workspace_size = get_workspace_size(l); l.activation = activation; - fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); return l; } diff --git a/src/layer.h b/src/layer.h index 81e27adf..8a58c927 100644 --- a/src/layer.h +++ b/src/layer.h @@ -113,6 +113,7 @@ struct layer{ int tanh; int *mask; int total; + float bflops; int adam; float B1; diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 031d116c..41f7a793 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -47,7 +47,8 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.output_gpu = cuda_make_array(l.output, output_size); l.delta_gpu = cuda_make_array(l.delta, output_size); #endif - fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; + fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); return l; } diff --git a/src/parser.c b/src/parser.c index 7441ae21..2b013de9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -712,6 +712,7 @@ network parse_network_cfg_custom(char *filename, int batch) params.time_steps = net.time_steps; params.net = net; + float bflops = 0; size_t workspace_size = 0; n = n->next; int count = 0; @@ -719,7 +720,7 @@ network parse_network_cfg_custom(char *filename, int batch) fprintf(stderr, "layer filters size input output\n"); while(n){ params.index = count; - fprintf(stderr, "%5d ", count); + fprintf(stderr, "%4d ", count); s = (section *)n->val; options = s->options; layer l = {0}; @@ -796,10 +797,12 @@ network parse_network_cfg_custom(char *filename, int batch) params.c = l.out_c; params.inputs = l.outputs; } + if (l.bflops > 0) bflops += l.bflops; } free_list(sections); net.outputs = get_network_output_size(net); net.output = get_network_output(net); + printf("Total BFLOPS %5.3f \n", bflops); if(workspace_size){ //printf("%ld\n", workspace_size); #ifdef GPU