diff --git a/cfg/gru.cfg b/cfg/gru.cfg new file mode 100644 index 00000000..f9a06999 --- /dev/null +++ b/cfg/gru.cfg @@ -0,0 +1,34 @@ +[net] +subdivisions=1 +inputs=256 +batch = 1 +momentum=0.9 +decay=0.001 +time_steps=1 +learning_rate=0.5 + +policy=poly +power=4 +max_batches=2000 + +[gru] +batch_normalize=1 +output = 1024 + +[gru] +batch_normalize=1 +output = 1024 + +[gru] +batch_normalize=1 +output = 1024 + +[connected] +output=256 +activation=linear + +[softmax] + +[cost] +type=sse + diff --git a/src/blas.c b/src/blas.c index 35a4c40a..00f0c3a3 100644 --- a/src/blas.c +++ b/src/blas.c @@ -2,6 +2,14 @@ #include "math.h" #include +void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) +{ + int i; + for(i = 0; i < n; ++i){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) { int stride = w1/w2; diff --git a/src/blas.h b/src/blas.h index 47d930c3..b4cfcf2e 100644 --- a/src/blas.h +++ b/src/blas.h @@ -31,6 +31,7 @@ void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_del void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); #ifdef GPU void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 5575aacf..c377802f 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -413,6 +413,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) } */ +/* if(l.binary){ int m = l.n; int k = l.size*l.size*l.c; @@ -434,6 +435,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) activate_array(l.output, m*n*l.batch, l.activation); return; } + */ int m = l.n; int k = l.size*l.size*l.c; diff --git a/src/detection_layer.c b/src/detection_layer.c index 90b672b1..1adda06b 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -175,6 +175,38 @@ void forward_detection_layer(const detection_layer l, network_state state) LOGISTIC, l.delta + index + locations*l.classes); } } + + if(1){ + float *costs = calloc(l.batch*locations*l.n, sizeof(float)); + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l.coords+l.classes); + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; + } + } + } + int indexes[100]; + top_k(costs, l.batch*locations*l.n, 100, indexes); + float cutoff = costs[indexes[99]]; + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l.coords+l.classes); + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; + } + } + } + free(costs); + } + + + + printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); } } diff --git a/src/gru_layer.c b/src/gru_layer.c index 1c41cbfb..4c720ce3 100644 --- a/src/gru_layer.c +++ b/src/gru_layer.c @@ -76,6 +76,14 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no l.outputs = outputs; l.output = calloc(outputs*batch*steps, sizeof(float)); l.delta = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + l.prev_state = calloc(outputs*batch, sizeof(float)); + l.forgot_state = calloc(outputs*batch, sizeof(float)); + l.forgot_delta = calloc(outputs*batch, sizeof(float)); + + l.r_cpu = calloc(outputs*batch, sizeof(float)); + l.z_cpu = calloc(outputs*batch, sizeof(float)); + l.h_cpu = calloc(outputs*batch, sizeof(float)); #ifdef GPU l.forgot_state_gpu = cuda_make_array(l.output, batch*outputs); @@ -101,6 +109,78 @@ void update_gru_layer(layer l, int batch, float learning_rate, float momentum, f void forward_gru_layer(layer l, network_state state) { + network_state s = {0}; + s.train = state.train; + int i; + layer input_z_layer = *(l.input_z_layer); + layer input_r_layer = *(l.input_r_layer); + layer input_h_layer = *(l.input_h_layer); + + layer state_z_layer = *(l.state_z_layer); + layer state_r_layer = *(l.state_r_layer); + layer state_h_layer = *(l.state_h_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, input_z_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, input_r_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, input_h_layer.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, state_z_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, state_r_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, state_h_layer.delta, 1); + if(state.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.state; + forward_connected_layer(state_z_layer, s); + forward_connected_layer(state_r_layer, s); + + s.input = state.input; + forward_connected_layer(input_z_layer, s); + forward_connected_layer(input_r_layer, s); + forward_connected_layer(input_h_layer, s); + + + copy_cpu(l.outputs*l.batch, input_z_layer.output, 1, l.z_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, state_z_layer.output, 1, l.z_cpu, 1); + + copy_cpu(l.outputs*l.batch, input_r_layer.output, 1, l.r_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, state_r_layer.output, 1, l.r_cpu, 1); + + activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1); + mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1); + + s.input = l.forgot_state; + forward_connected_layer(state_h_layer, s); + + copy_cpu(l.outputs*l.batch, input_h_layer.output, 1, l.h_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, state_h_layer.output, 1, l.h_cpu, 1); + + #ifdef USET + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + #else + activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC); + #endif + + weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output); + + copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1); + + state.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + increment_layer(&input_z_layer, 1); + increment_layer(&input_r_layer, 1); + increment_layer(&input_h_layer, 1); + + increment_layer(&state_z_layer, 1); + increment_layer(&state_r_layer, 1); + increment_layer(&state_h_layer, 1); + } } void backward_gru_layer(layer l, network_state state) diff --git a/src/layer.h b/src/layer.h index d53fe387..d2250a6d 100644 --- a/src/layer.h +++ b/src/layer.h @@ -28,6 +28,7 @@ typedef enum { CRNN, BATCHNORM, NETWORK, + XNOR, BLANK } LAYER_TYPE; @@ -102,6 +103,9 @@ struct layer{ char *cfilters; float *filter_updates; float *state; + float *prev_state; + float *forgot_state; + float *forgot_delta; float *state_delta; float *concat; @@ -159,6 +163,10 @@ struct layer{ struct layer *input_h_layer; struct layer *state_h_layer; + float *z_cpu; + float *r_cpu; + float *h_cpu; + size_t workspace_size; #ifdef GPU diff --git a/src/tag.c b/src/tag.c index a00a161a..f97621c3 100644 --- a/src/tag.c +++ b/src/tag.c @@ -6,7 +6,7 @@ #include "opencv2/highgui/highgui_c.h" #endif -void train_tag(char *cfgfile, char *weightfile) +void train_tag(char *cfgfile, char *weightfile, int clear) { data_seed = time(0); srand(time(0)); @@ -18,6 +18,7 @@ void train_tag(char *cfgfile, char *weightfile) if(weightfile){ load_weights(&net, weightfile); } + if(clear) *net.seen = 0; printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); int imgs = 1024; list *plist = get_paths("/home/pjreddie/tag/train.list"); @@ -138,10 +139,11 @@ void run_tag(int argc, char **argv) return; } + int clear = find_arg(argc, argv, "-clear"); char *cfg = argv[3]; char *weights = (argc > 4) ? argv[4] : 0; char *filename = (argc > 5) ? argv[5] : 0; - if(0==strcmp(argv[2], "train")) train_tag(cfg, weights); + if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); }