mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Added sequential subdivisions
This commit is contained in:
@ -559,6 +559,7 @@ typedef struct network {
|
|||||||
int time_steps;
|
int time_steps;
|
||||||
int step;
|
int step;
|
||||||
int max_batches;
|
int max_batches;
|
||||||
|
float *seq_scales;
|
||||||
float *scales;
|
float *scales;
|
||||||
int *steps;
|
int *steps;
|
||||||
int num_steps;
|
int num_steps;
|
||||||
@ -591,6 +592,7 @@ typedef struct network {
|
|||||||
int track;
|
int track;
|
||||||
int augment_speed;
|
int augment_speed;
|
||||||
int sequential_subdivisions;
|
int sequential_subdivisions;
|
||||||
|
int init_sequential_subdivisions;
|
||||||
int current_subdivision;
|
int current_subdivision;
|
||||||
int try_fix_nan;
|
int try_fix_nan;
|
||||||
|
|
||||||
|
1162
src/conv_lstm_layer.c
Normal file
1162
src/conv_lstm_layer.c
Normal file
File diff suppressed because it is too large
Load Diff
33
src/conv_lstm_layer.h
Normal file
33
src/conv_lstm_layer.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#ifndef CONV_LSTM_LAYER_H
|
||||||
|
#define CONV_LSTM_LAYER_H
|
||||||
|
|
||||||
|
#include "activations.h"
|
||||||
|
#include "layer.h"
|
||||||
|
#include "network.h"
|
||||||
|
#define USET
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int peephole, int xnor);
|
||||||
|
void resize_conv_lstm_layer(layer *l, int w, int h);
|
||||||
|
void free_state_conv_lstm(layer l);
|
||||||
|
void randomize_state_conv_lstm(layer l);
|
||||||
|
void remember_state_conv_lstm(layer l);
|
||||||
|
void restore_state_conv_lstm(layer l);
|
||||||
|
|
||||||
|
void forward_conv_lstm_layer(layer l, network_state state);
|
||||||
|
void backward_conv_lstm_layer(layer l, network_state state);
|
||||||
|
void update_conv_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||||
|
|
||||||
|
#ifdef GPU
|
||||||
|
void forward_conv_lstm_layer_gpu(layer l, network_state state);
|
||||||
|
void backward_conv_lstm_layer_gpu(layer l, network_state state);
|
||||||
|
void update_conv_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // CONV_LSTM_LAYER_H
|
@ -191,6 +191,11 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||||||
time = what_time_is_it_now();
|
time = what_time_is_it_now();
|
||||||
pthread_join(load_thread, 0);
|
pthread_join(load_thread, 0);
|
||||||
train = buffer;
|
train = buffer;
|
||||||
|
if (net.track) {
|
||||||
|
net.sequential_subdivisions = get_current_seq_subdivisions(net);
|
||||||
|
args.threads = net.sequential_subdivisions * ngpus;
|
||||||
|
printf(" sequential_subdivisions = %d \n", net.sequential_subdivisions);
|
||||||
|
}
|
||||||
load_thread = load_data(args);
|
load_thread = load_data(args);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -662,7 +667,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
|
|||||||
char *train_images = option_find_str(options, "train", "data/train.txt");
|
char *train_images = option_find_str(options, "train", "data/train.txt");
|
||||||
valid_images = option_find_str(options, "valid", train_images);
|
valid_images = option_find_str(options, "valid", train_images);
|
||||||
net = *existing_net;
|
net = *existing_net;
|
||||||
remember_network_recurrent_state(*existing_net);
|
//remember_network_recurrent_state(*existing_net);
|
||||||
free_network_recurrent_state(*existing_net);
|
free_network_recurrent_state(*existing_net);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -1073,7 +1078,8 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
|
|||||||
if (existing_net) {
|
if (existing_net) {
|
||||||
//set_batch_network(&net, initial_batch);
|
//set_batch_network(&net, initial_batch);
|
||||||
//free_network_recurrent_state(*existing_net);
|
//free_network_recurrent_state(*existing_net);
|
||||||
restore_network_recurrent_state(*existing_net);
|
//restore_network_recurrent_state(*existing_net);
|
||||||
|
randomize_network_recurrent_state(*existing_net);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
free_network(net);
|
free_network(net);
|
||||||
|
@ -91,6 +91,30 @@ void reset_rnn(network *net)
|
|||||||
reset_network_state(net, 0);
|
reset_network_state(net, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float get_current_seq_subdivisions(network net)
|
||||||
|
{
|
||||||
|
int sequence_subdivisions = net.init_sequential_subdivisions;
|
||||||
|
|
||||||
|
if (net.policy)
|
||||||
|
{
|
||||||
|
int batch_num = get_current_batch(net);
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < net.num_steps; ++i) {
|
||||||
|
if (net.steps[i] > batch_num) break;
|
||||||
|
sequence_subdivisions *= net.seq_scales[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sequence_subdivisions;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_sequence_value(network net)
|
||||||
|
{
|
||||||
|
int sequence = 1;
|
||||||
|
if (net.sequential_subdivisions != 0) sequence = net.subdivisions / net.sequential_subdivisions;
|
||||||
|
if (sequence < 1) sequence = 1;
|
||||||
|
return sequence;
|
||||||
|
}
|
||||||
|
|
||||||
float get_current_rate(network net)
|
float get_current_rate(network net)
|
||||||
{
|
{
|
||||||
int batch_num = get_current_batch(net);
|
int batch_num = get_current_batch(net);
|
||||||
@ -928,6 +952,7 @@ void free_network(network net)
|
|||||||
}
|
}
|
||||||
free(net.layers);
|
free(net.layers);
|
||||||
|
|
||||||
|
free(net.seq_scales);
|
||||||
free(net.scales);
|
free(net.scales);
|
||||||
free(net.steps);
|
free(net.steps);
|
||||||
free(net.seen);
|
free(net.seen);
|
||||||
@ -1124,6 +1149,15 @@ void free_network_recurrent_state(network net)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void randomize_network_recurrent_state(network net)
|
||||||
|
{
|
||||||
|
int k;
|
||||||
|
for (k = 0; k < net.n; ++k) {
|
||||||
|
if (net.layers[k].type == CONV_LSTM) randomize_state_conv_lstm(net.layers[k]);
|
||||||
|
if (net.layers[k].type == CRNN) free_state_crnn(net.layers[k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void remember_network_recurrent_state(network net)
|
void remember_network_recurrent_state(network net)
|
||||||
{
|
{
|
||||||
|
@ -104,6 +104,8 @@ void backward_network_gpu(network net, network_state state);
|
|||||||
void update_network_gpu(network net);
|
void update_network_gpu(network net);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
float get_current_seq_subdivisions(network net);
|
||||||
|
int get_sequence_value(network net);
|
||||||
float get_current_rate(network net);
|
float get_current_rate(network net);
|
||||||
int get_current_batch(network net);
|
int get_current_batch(network net);
|
||||||
void free_network(network net);
|
void free_network(network net);
|
||||||
@ -164,6 +166,7 @@ int get_network_background(network net);
|
|||||||
network combine_train_valid_networks(network net_train, network net_map);
|
network combine_train_valid_networks(network net_train, network net_map);
|
||||||
void copy_weights_net(network net_train, network *net_map);
|
void copy_weights_net(network net_train, network *net_map);
|
||||||
void free_network_recurrent_state(network net);
|
void free_network_recurrent_state(network net);
|
||||||
|
void randomize_network_recurrent_state(network net);
|
||||||
void remember_network_recurrent_state(network net);
|
void remember_network_recurrent_state(network net);
|
||||||
void restore_network_recurrent_state(network net);
|
void restore_network_recurrent_state(network net);
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ void update_network_gpu(network net)
|
|||||||
{
|
{
|
||||||
cuda_set_device(net.gpu_index);
|
cuda_set_device(net.gpu_index);
|
||||||
int i;
|
int i;
|
||||||
int update_batch = net.batch*net.subdivisions;
|
int update_batch = net.batch*net.subdivisions * get_sequence_value(net);
|
||||||
float rate = get_current_rate(net);
|
float rate = get_current_rate(net);
|
||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
layer l = net.layers[i];
|
layer l = net.layers[i];
|
||||||
@ -200,7 +200,9 @@ float train_network_datum_gpu(network net, float *x, float *y)
|
|||||||
*net.seen += net.batch;
|
*net.seen += net.batch;
|
||||||
forward_backward_network_gpu(net, x, y);
|
forward_backward_network_gpu(net, x, y);
|
||||||
float error = get_network_cost(net);
|
float error = get_network_cost(net);
|
||||||
if (((*net.seen) / net.batch) % net.subdivisions == 0) update_network_gpu(net);
|
//if (((*net.seen) / net.batch) % net.subdivisions == 0) update_network_gpu(net);
|
||||||
|
const int sequence = get_sequence_value(net);
|
||||||
|
if (((*net.seen) / net.batch) % (net.subdivisions*sequence) == 0) update_network_gpu(net);
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
12
src/parser.c
12
src/parser.c
@ -672,7 +672,7 @@ void parse_net_options(list *options, network *net)
|
|||||||
net->time_steps = option_find_int_quiet(options, "time_steps",1);
|
net->time_steps = option_find_int_quiet(options, "time_steps",1);
|
||||||
net->track = option_find_int_quiet(options, "track", 0);
|
net->track = option_find_int_quiet(options, "track", 0);
|
||||||
net->augment_speed = option_find_int_quiet(options, "augment_speed", 2);
|
net->augment_speed = option_find_int_quiet(options, "augment_speed", 2);
|
||||||
net->sequential_subdivisions = option_find_int_quiet(options, "sequential_subdivisions", 0);
|
net->init_sequential_subdivisions = net->sequential_subdivisions = option_find_int_quiet(options, "sequential_subdivisions", subdivs);
|
||||||
net->try_fix_nan = option_find_int_quiet(options, "try_fix_nan", 0);
|
net->try_fix_nan = option_find_int_quiet(options, "try_fix_nan", 0);
|
||||||
net->batch /= subdivs;
|
net->batch /= subdivs;
|
||||||
net->batch *= net->time_steps;
|
net->batch *= net->time_steps;
|
||||||
@ -721,6 +721,7 @@ void parse_net_options(list *options, network *net)
|
|||||||
} else if (net->policy == STEPS){
|
} else if (net->policy == STEPS){
|
||||||
char *l = option_find(options, "steps");
|
char *l = option_find(options, "steps");
|
||||||
char *p = option_find(options, "scales");
|
char *p = option_find(options, "scales");
|
||||||
|
char *s = option_find(options, "seq_scales");
|
||||||
if(!l || !p) error("STEPS policy must have steps and scales in cfg file");
|
if(!l || !p) error("STEPS policy must have steps and scales in cfg file");
|
||||||
|
|
||||||
int len = strlen(l);
|
int len = strlen(l);
|
||||||
@ -731,6 +732,14 @@ void parse_net_options(list *options, network *net)
|
|||||||
}
|
}
|
||||||
int* steps = (int*)calloc(n, sizeof(int));
|
int* steps = (int*)calloc(n, sizeof(int));
|
||||||
float* scales = (float*)calloc(n, sizeof(float));
|
float* scales = (float*)calloc(n, sizeof(float));
|
||||||
|
float* seq_scales = (float*)calloc(n, sizeof(float));
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
if (s) {
|
||||||
|
seq_scales[i] = atof(s);
|
||||||
|
s = strchr(s, ',') + 1;
|
||||||
|
} else
|
||||||
|
seq_scales[i] = 1;
|
||||||
|
}
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
int step = atoi(l);
|
int step = atoi(l);
|
||||||
float scale = atof(p);
|
float scale = atof(p);
|
||||||
@ -741,6 +750,7 @@ void parse_net_options(list *options, network *net)
|
|||||||
}
|
}
|
||||||
net->scales = scales;
|
net->scales = scales;
|
||||||
net->steps = steps;
|
net->steps = steps;
|
||||||
|
net->seq_scales = seq_scales;
|
||||||
net->num_steps = n;
|
net->num_steps = n;
|
||||||
} else if (net->policy == EXP){
|
} else if (net->policy == EXP){
|
||||||
net->gamma = option_find_float(options, "gamma", 1);
|
net->gamma = option_find_float(options, "gamma", 1);
|
||||||
|
Reference in New Issue
Block a user