mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Optimized memory consumption during Training with -map flag
This commit is contained in:
115
src/detector.c
115
src/detector.c
@ -44,6 +44,36 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
||||
char *valid_images = option_find_str(options, "valid", train_images);
|
||||
char *backup_directory = option_find_str(options, "backup", "/backup/");
|
||||
|
||||
int train_images_num = 0;
|
||||
network net_map;
|
||||
if (calc_map) {
|
||||
FILE* valid_file = fopen(valid_images, "r");
|
||||
if (!valid_file) {
|
||||
printf("\n Error: There is no %s file for mAP calculation!\n Don't use -map flag.\n Or set valid=%s in your %s file. \n", valid_images, train_images, datacfg);
|
||||
getchar();
|
||||
exit(-1);
|
||||
}
|
||||
else fclose(valid_file);
|
||||
list *plist = get_paths(train_images);
|
||||
train_images_num = plist->size;
|
||||
free_list(plist);
|
||||
|
||||
cuda_set_device(gpus[0]);
|
||||
printf(" Prepare additional network for mAP calculation...\n");
|
||||
net_map = parse_network_cfg_custom(cfgfile, 1);
|
||||
int k; // free memory unnecessary arrays
|
||||
for (k = 0; k < net_map.n; ++k) {
|
||||
free_layer(net_map.layers[k]);
|
||||
}
|
||||
#ifdef GPU
|
||||
cuda_free(net_map.workspace);
|
||||
if (*net_map.input16_gpu) cuda_free(*net_map.input16_gpu);
|
||||
if (*net_map.output16_gpu) cuda_free(*net_map.output16_gpu);
|
||||
#else
|
||||
free(net_map.workspace);
|
||||
#endif
|
||||
}
|
||||
|
||||
srand(time(0));
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
@ -68,44 +98,6 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
||||
srand(time(0));
|
||||
network net = nets[0];
|
||||
|
||||
int train_images_num = 0;
|
||||
network net_map;
|
||||
if (calc_map) {
|
||||
FILE* valid_file = fopen(valid_images, "r");
|
||||
if (!valid_file) {
|
||||
printf("\n Error: There is no %s file for mAP calculation!\n Don't use -map flag.\n Or set valid=%s in your %s file. \n", valid_images, train_images, datacfg);
|
||||
getchar();
|
||||
exit(-1);
|
||||
}
|
||||
else fclose(valid_file);
|
||||
list *plist = get_paths(train_images);
|
||||
train_images_num = plist->size;
|
||||
free_list(plist);
|
||||
|
||||
cuda_set_device(net.gpu_index);
|
||||
printf(" Prepare additional network for mAP calculation...\n");
|
||||
net_map = parse_network_cfg_custom(cfgfile, 1);
|
||||
int k;
|
||||
for (k = 0; k < net.n; ++k) {
|
||||
layer l = net.layers[k];
|
||||
if (l.type == CONVOLUTIONAL) {
|
||||
net_map.layers[k].biases = l.biases;
|
||||
net_map.layers[k].scales = l.scales;
|
||||
net_map.layers[k].rolling_mean = l.rolling_mean;
|
||||
net_map.layers[k].rolling_variance = l.rolling_variance;
|
||||
net_map.layers[k].weights = l.weights;
|
||||
#ifdef GPU
|
||||
net_map.layers[k].biases_gpu = l.biases_gpu;
|
||||
net_map.layers[k].scales_gpu = l.scales_gpu;
|
||||
net_map.layers[k].rolling_mean_gpu = l.rolling_mean_gpu;
|
||||
net_map.layers[k].rolling_variance_gpu = l.rolling_variance_gpu;
|
||||
net_map.layers[k].weights_gpu = l.weights_gpu;
|
||||
net_map.layers[k].weights_gpu16 = l.weights_gpu16;
|
||||
#endif // GPU
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const int actual_batch_size = net.batch * net.subdivisions;
|
||||
if (actual_batch_size == 1) {
|
||||
printf("\n Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64 \n");
|
||||
@ -255,8 +247,50 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
||||
int draw_precision = 0;
|
||||
int calc_map_for_each = 4 * train_images_num / (net.batch * net.subdivisions);
|
||||
if (calc_map && (i >= (iter_map + calc_map_for_each) || i == net.max_batches) && i >= net.burn_in && i >= 1000) {
|
||||
if (l.random) {
|
||||
printf("Resizing to initial size: %d x %d \n", init_w, init_h);
|
||||
args.w = init_w;
|
||||
args.h = init_h;
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data(args);
|
||||
int k;
|
||||
for (k = 0; k < ngpus; ++k) {
|
||||
resize_network(nets + k, init_w, init_h);
|
||||
}
|
||||
net = nets[0];
|
||||
}
|
||||
|
||||
// combine Training and Validation networks
|
||||
network net_combined = make_network(net.n);
|
||||
layer *old_layers = net_combined.layers;
|
||||
net_combined = net;
|
||||
net_combined.layers = old_layers;
|
||||
net_combined.batch = 1;
|
||||
|
||||
int k;
|
||||
for (k = 0; k < net.n; ++k) {
|
||||
layer *l = &(net.layers[k]);
|
||||
net_combined.layers[k] = net.layers[k];
|
||||
net_combined.layers[k].batch = 1;
|
||||
|
||||
if (l->type == CONVOLUTIONAL) {
|
||||
#ifdef CUDNN
|
||||
net_combined.layers[k].normTensorDesc = net_map.layers[k].normTensorDesc;
|
||||
net_combined.layers[k].normDstTensorDesc = net_map.layers[k].normDstTensorDesc;
|
||||
net_combined.layers[k].normDstTensorDescF16 = net_map.layers[k].normDstTensorDescF16;
|
||||
|
||||
net_combined.layers[k].srcTensorDesc = net_map.layers[k].srcTensorDesc;
|
||||
net_combined.layers[k].dstTensorDesc = net_map.layers[k].dstTensorDesc;
|
||||
|
||||
net_combined.layers[k].srcTensorDesc16 = net_map.layers[k].srcTensorDesc16;
|
||||
net_combined.layers[k].dstTensorDesc16 = net_map.layers[k].dstTensorDesc16;
|
||||
#endif // CUDNN
|
||||
}
|
||||
}
|
||||
|
||||
iter_map = i;
|
||||
mean_average_precision = validate_detector_map(datacfg, cfgfile, weightfile, 0.25, 0.5, &net_map);
|
||||
mean_average_precision = validate_detector_map(datacfg, cfgfile, weightfile, 0.25, 0.5, &net_combined);
|
||||
printf("\n mean_average_precision = %f \n", mean_average_precision);
|
||||
draw_precision = 1;
|
||||
}
|
||||
@ -312,8 +346,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
||||
free_list_contents_kvp(options);
|
||||
free_list(options);
|
||||
|
||||
for (i = 0; i < ngpus; ++i) free_network(nets[i]);
|
||||
free(nets);
|
||||
free_network(net);
|
||||
//free_network(net);
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user