diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 10a17baf..12c88c4d 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -295,7 +295,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) //#ifdef CUDNN_HALF //if (state.use_mixed_precision) { int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); - if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > state.net.burn_in)) + if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 2*state.net.burn_in)) { //printf("\n CUDNN_HALF!!! state.index = %d \n", state.index); @@ -475,7 +475,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state //#ifdef CUDNN_HALF int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); - if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > state.net.burn_in)) + if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 2*state.net.burn_in)) { const size_t input16_size = l.batch*l.c*l.w*l.h; diff --git a/src/detector.c b/src/detector.c index 4841041a..bee8ef17 100644 --- a/src/detector.c +++ b/src/detector.c @@ -222,7 +222,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i i = get_current_batch(net); if (net.cudnn_half) { - if (i < net.burn_in) printf("\n Tensor Cores are disabled until the first %d iterations are reached.", net.burn_in); + if (i < net.burn_in) printf("\n Tensor Cores are disabled until the first %d iterations are reached.", 2*net.burn_in); else printf("\n Tensor Cores are used."); } printf("\n %d: %f, %f avg loss, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), (what_time_is_it_now()-time), i*imgs);