adding yolo9000

This commit is contained in:
Joseph Redmon 2017-01-04 04:44:00 -08:00
parent 2710d63257
commit d2dece3df7
29 changed files with 19591 additions and 596 deletions

2
.gitignore vendored
View File

@ -3,6 +3,8 @@
*.csv *.csv
*.out *.out
*.png *.png
*.jpg
old/
mnist/ mnist/
data/ data/
caffe/ caffe/

View File

@ -1,7 +1,7 @@
classes= 80 classes= 80
train = /home/pjreddie/data/coco/trainvalno5k.txt train = /home/pjreddie/data/coco/trainvalno5k.txt
valid = coco_testdev #valid = coco_testdev
#valid = data/coco_val_5k.list valid = data/coco_val_5k.list
names = data/coco.names names = data/coco.names
backup = /home/pjreddie/backup/ backup = /home/pjreddie/backup/
eval=coco eval=coco

View File

@ -2,5 +2,5 @@ classes= 20
train = /home/pjreddie/data/voc/train.txt train = /home/pjreddie/data/voc/train.txt
valid = /home/pjreddie/data/voc/2007_test.txt valid = /home/pjreddie/data/voc/2007_test.txt
names = data/voc.names names = data/voc.names
backup = /home/pjreddie/backup/ backup = backup

211
cfg/yolo9000.cfg Normal file
View File

@ -0,0 +1,211 @@
[net]
batch=1
subdivisions=1
height=416
width=416
channels=3
momentum=0.9
decay=0.0005
learning_rate=0.00001
max_batches = 242200
policy=steps
steps=500,200000,240000
scales=10,.1,.1
hue=.1
saturation=.75
exposure=.75
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
filters=28269
size=1
stride=1
pad=1
activation=linear
[region]
anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974
bias_match=1
classes=9418
coords=4
num=3
softmax=1
jitter=.2
rescore=1
object_scale=5
noobject_scale=1
class_scale=1
coord_scale=1
thresh = .6
absolute=1
random=1
tree=data/9k.tree
map = data/coco9k.map

9418
data/9k.labels Normal file

File diff suppressed because it is too large Load Diff

9418
data/9k.names Normal file

File diff suppressed because it is too large Load Diff

80
data/coco9k.map Normal file
View File

@ -0,0 +1,80 @@
5177
3768
3802
3800
4107
4072
4071
3797
4097
2645
5150
2644
3257
2523
6527
6866
6912
7342
7255
7271
7217
6858
7343
7233
3704
4374
3641
5001
3899
2999
2631
5141
2015
1133
1935
1930
5144
5143
2371
3916
3745
3640
4749
4736
4735
3678
58
42
771
81
152
141
786
700
218
791
2518
2521
3637
2458
2505
2519
3499
2837
3503
2597
3430
2080
5103
5111
5102
3013
5096
1102
3218
4010
2266
1127
5122
2360

200
data/inet9k.map Normal file
View File

@ -0,0 +1,200 @@
2687
4107
8407
7254
42
6797
127
2268
2442
3704
260
1970
58
4443
2661
2043
2039
4858
4007
6858
8408
166
2523
3768
4347
6527
2446
5005
3274
3678
4918
709
4072
8428
7223
2251
3802
3848
7271
2677
8267
2849
2518
2738
3746
5105
3430
3503
2249
1841
2032
2358
122
3984
4865
3246
5095
6912
6878
8467
2741
1973
3057
7217
1872
44
2452
3637
2704
6917
2715
6734
2325
6864
6677
2035
1949
338
2664
5122
1844
784
2223
7188
2719
2670
4830
158
4818
7228
1965
7342
786
2095
8281
8258
7406
3915
8382
2437
2837
82
6871
1876
7447
8285
5007
2740
3463
5103
3755
4910
6809
3800
118
3396
3092
2709
81
7105
4036
2366
1846
5177
2684
64
2041
3919
700
3724
1742
39
807
7184
2256
235
2778
2996
2030
3714
7167
2369
6705
6861
5096
2597
2166
2036
3228
3747
2711
8300
2226
7153
7255
2631
7109
8242
7445
3776
3803
3690
2025
2521
2316
7190
8249
3352
2639
2887
100
4219
3344
5008
7224
3351
2434
2074
2034
8304
5004
6868
5102
2645
4071
2716
2717
7420
3499
3763
5084
2676
2046
5107
5097
3944
4097
7132
3956
7343

View File

@ -246,6 +246,34 @@ int nms_comparator(const void *pa, const void *pb)
return 0; return 0;
} }
void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh)
{
int i, j, k;
sortable_bbox *s = calloc(total, sizeof(sortable_bbox));
for(i = 0; i < total; ++i){
s[i].index = i;
s[i].class = classes;
s[i].probs = probs;
}
qsort(s, total, sizeof(sortable_bbox), nms_comparator);
for(i = 0; i < total; ++i){
if(probs[s[i].index][classes] == 0) continue;
box a = boxes[s[i].index];
for(j = i+1; j < total; ++j){
box b = boxes[s[j].index];
if (box_iou(a, b) > thresh){
for(k = 0; k < classes+1; ++k){
probs[s[j].index][k] = 0;
}
}
}
}
free(s);
}
void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh) void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh)
{ {
int i, j, k; int i, j, k;

View File

@ -15,6 +15,7 @@ float box_rmse(box a, box b);
dbox diou(box a, box b); dbox diou(box a, box b);
void do_nms(box *boxes, float **probs, int total, int classes, float thresh); void do_nms(box *boxes, float **probs, int total, int classes, float thresh);
void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh); void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh);
void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh);
box decode_box(box b, box anchor); box decode_box(box b, box anchor);
box encode_box(box b, box anchor); box encode_box(box b, box anchor);

View File

@ -384,5 +384,5 @@ void run_coco(int argc, char **argv)
else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix); else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, .5);
} }

View File

@ -127,6 +127,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
void backward_convolutional_layer_gpu(convolutional_layer l, network_state state) void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
{ {
//constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);

View File

@ -1,91 +0,0 @@
#include "mini_blas.h"
void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float BETA,
float *C, int ldc)
{
int i,j,k;
for(i = 0; i < M; ++i){
for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[i*lda+k];
for(j = 0; j < N; ++j){
C[i*ldc+j] += A_PART*B[k*ldb+j];
}
}
}
}
void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float BETA,
float *C, int ldc)
{
int i,j,k;
for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){
register float sum = 0;
for(k = 0; k < K; ++k){
sum += ALPHA*A[i*lda+k]*B[k+j*ldb];
}
C[i*ldc+j] += sum;
}
}
}
void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float BETA,
float *C, int ldc)
{
int i,j,k;
for(i = 0; i < M; ++i){
for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[k*lda+i];
for(j = 0; j < N; ++j){
C[i*ldc+j] += A_PART*B[k*ldb+j];
}
}
}
}
void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float BETA,
float *C, int ldc)
{
int i,j,k;
for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){
for(k = 0; k < K; ++k){
C[i*ldc+j] += ALPHA*A[i+k*lda]*B[k+j*ldb];
}
}
}
}
void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float BETA,
float *C, int ldc)
{
int i, j;
for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){
C[i*ldc + j] *= BETA;
}
}
if(!TA && !TB)
cpu_gemm_nn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
else if(TA && !TB)
cpu_gemm_tn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
else if(!TA && TB)
cpu_gemm_nt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
else
cpu_gemm_tt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
}

View File

@ -13,7 +13,7 @@
#endif #endif
extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh); extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh);
extern void run_voxel(int argc, char **argv); extern void run_voxel(int argc, char **argv);
extern void run_yolo(int argc, char **argv); extern void run_yolo(int argc, char **argv);
extern void run_detector(int argc, char **argv); extern void run_detector(int argc, char **argv);
@ -129,7 +129,9 @@ void oneoff(char *cfgfile, char *weightfile, char *outfile)
network net = parse_network_cfg(cfgfile); network net = parse_network_cfg(cfgfile);
int oldn = net.layers[net.n - 2].n; int oldn = net.layers[net.n - 2].n;
int c = net.layers[net.n - 2].c; int c = net.layers[net.n - 2].c;
net.layers[net.n - 2].n = 9372; scal_cpu(oldn*c, .1, net.layers[net.n - 2].weights, 1);
scal_cpu(oldn, 0, net.layers[net.n - 2].biases, 1);
net.layers[net.n - 2].n = 9418;
net.layers[net.n - 2].biases += 5; net.layers[net.n - 2].biases += 5;
net.layers[net.n - 2].weights += 5*c; net.layers[net.n - 2].weights += 5*c;
if(weightfile){ if(weightfile){
@ -383,7 +385,7 @@ int main(int argc, char **argv)
} else if (0 == strcmp(argv[1], "detect")){ } else if (0 == strcmp(argv[1], "detect")){
float thresh = find_float_arg(argc, argv, "-thresh", .24); float thresh = find_float_arg(argc, argv, "-thresh", .24);
char *filename = (argc > 4) ? argv[4]: 0; char *filename = (argc > 4) ? argv[4]: 0;
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh); test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5);
} else if (0 == strcmp(argv[1], "cifar")){ } else if (0 == strcmp(argv[1], "cifar")){
run_cifar(argc, argv); run_cifar(argc, argv);
} else if (0 == strcmp(argv[1], "go")){ } else if (0 == strcmp(argv[1], "go")){

View File

@ -267,7 +267,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
h = boxes[i].h; h = boxes[i].h;
id = boxes[i].id; id = boxes[i].id;
if (w < .01 || h < .01) continue; if (w < .005 || h < .005) continue;
int col = (int)(x*num_boxes); int col = (int)(x*num_boxes);
int row = (int)(y*num_boxes); int row = (int)(y*num_boxes);
@ -317,7 +317,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
h = boxes[i].h; h = boxes[i].h;
id = boxes[i].id; id = boxes[i].id;
if ((w < .01 || h < .01)) continue; if ((w < .005 || h < .005)) continue;
truth[i*5+0] = x; truth[i*5+0] = x;
truth[i*5+1] = y; truth[i*5+1] = y;

View File

@ -31,6 +31,7 @@ static image disp = {0};
static CvCapture * cap; static CvCapture * cap;
static float fps = 0; static float fps = 0;
static float demo_thresh = 0; static float demo_thresh = 0;
static float demo_hier_thresh = .5;
static float *predictions[FRAMES]; static float *predictions[FRAMES];
static int demo_index = 0; static int demo_index = 0;
@ -63,7 +64,7 @@ void *detect_in_thread(void *ptr)
if(l.type == DETECTION){ if(l.type == DETECTION){
get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
} else if (l.type == REGION){ } else if (l.type == REGION){
get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0); get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0, demo_hier_thresh);
} else { } else {
error("Last layer must produce detections\n"); error("Last layer must produce detections\n");
} }
@ -91,7 +92,7 @@ double get_wall_time()
return (double)time.tv_sec + (double)time.tv_usec * .000001; return (double)time.tv_sec + (double)time.tv_usec * .000001;
} }
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix) void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh)
{ {
//skip = frame_skip; //skip = frame_skip;
image **alphabet = load_alphabet(); image **alphabet = load_alphabet();
@ -100,6 +101,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
demo_alphabet = alphabet; demo_alphabet = alphabet;
demo_classes = classes; demo_classes = classes;
demo_thresh = thresh; demo_thresh = thresh;
demo_hier_thresh = hier_thresh;
printf("Demo\n"); printf("Demo\n");
net = parse_network_cfg(cfgfile); net = parse_network_cfg(cfgfile);
if(weightfile){ if(weightfile){
@ -127,7 +129,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box));
probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *));
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *)); for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float));
pthread_t fetch_thread; pthread_t fetch_thread;
pthread_t detect_thread; pthread_t detect_thread;
@ -213,7 +215,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
} }
} }
#else #else
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix) void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh)
{ {
fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
} }

View File

@ -2,6 +2,6 @@
#define DEMO #define DEMO
#include "image.h" #include "image.h"
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix); void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh);
#endif #endif

View File

@ -81,7 +81,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
if(l.random && count++%10 == 0){ if(l.random && count++%10 == 0){
printf("Resizing\n"); printf("Resizing\n");
int dim = (rand() % 10 + 10) * 32; int dim = (rand() % 10 + 10) * 32;
if (get_current_batch(net)+100 > net.max_batches) dim = 544; if (get_current_batch(net)+200 > net.max_batches) dim = 608;
//int dim = (rand() % 4 + 16) * 32; //int dim = (rand() % 4 + 16) * 32;
printf("%d\n", dim); printf("%d\n", dim);
args.w = dim; args.w = dim;
@ -231,7 +231,7 @@ void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int
} }
} }
void validate_detector(char *datacfg, char *cfgfile, char *weightfile) void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
{ {
int j; int j;
list *options = read_data_cfg(datacfg); list *options = read_data_cfg(datacfg);
@ -251,7 +251,6 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
srand(time(0)); srand(time(0));
char *base = "comp4_det_test_";
list *plist = get_paths(valid_images); list *plist = get_paths(valid_images);
char **paths = (char **)list_to_array(plist); char **paths = (char **)list_to_array(plist);
@ -265,19 +264,22 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
int coco = 0; int coco = 0;
int imagenet = 0; int imagenet = 0;
if(0==strcmp(type, "coco")){ if(0==strcmp(type, "coco")){
snprintf(buff, 1024, "%s/coco_results.json", prefix); if(!outfile) outfile = "coco_results";
snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
fp = fopen(buff, "w"); fp = fopen(buff, "w");
fprintf(fp, "[\n"); fprintf(fp, "[\n");
coco = 1; coco = 1;
} else if(0==strcmp(type, "imagenet")){ } else if(0==strcmp(type, "imagenet")){
snprintf(buff, 1024, "%s/imagenet-detection.txt", prefix); if(!outfile) outfile = "imagenet-detection";
snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
fp = fopen(buff, "w"); fp = fopen(buff, "w");
imagenet = 1; imagenet = 1;
classes = 200; classes = 200;
} else { } else {
if(!outfile) outfile = "comp4_det_test_";
fps = calloc(classes, sizeof(FILE *)); fps = calloc(classes, sizeof(FILE *));
for(j = 0; j < classes; ++j){ for(j = 0; j < classes; ++j){
snprintf(buff, 1024, "%s/%s%s.txt", prefix, base, names[j]); snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
fps[j] = fopen(buff, "w"); fps[j] = fopen(buff, "w");
} }
} }
@ -333,7 +335,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
network_predict(net, X); network_predict(net, X);
int w = val[t].w; int w = val[t].w;
int h = val[t].h; int h = val[t].h;
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map); get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms); if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
if (coco){ if (coco){
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h); print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@ -397,7 +399,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
image sized = resize_image(orig, net.w, net.h); image sized = resize_image(orig, net.w, net.h);
char *id = basecfg(path); char *id = basecfg(path);
network_predict(net, sized.data); network_predict(net, sized.data);
get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0); get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0, .5);
if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms); if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);
char labelpath[4096]; char labelpath[4096];
@ -436,7 +438,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
} }
} }
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh) void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh)
{ {
list *options = read_data_cfg(datacfg); list *options = read_data_cfg(datacfg);
char *name_list = option_find_str(options, "names", "data/names.list"); char *name_list = option_find_str(options, "names", "data/names.list");
@ -470,14 +472,15 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));
float *X = sized.data; float *X = sized.data;
time=clock(); time=clock();
network_predict(net, X); network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0); get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); if (l.softmax_tree && nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes); draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
save_image(im, "predictions"); save_image(im, "predictions");
show_image(im, "predictions"); show_image(im, "predictions");
@ -498,6 +501,7 @@ void run_detector(int argc, char **argv)
{ {
char *prefix = find_char_arg(argc, argv, "-prefix", 0); char *prefix = find_char_arg(argc, argv, "-prefix", 0);
float thresh = find_float_arg(argc, argv, "-thresh", .24); float thresh = find_float_arg(argc, argv, "-thresh", .24);
float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
int cam_index = find_int_arg(argc, argv, "-c", 0); int cam_index = find_int_arg(argc, argv, "-c", 0);
int frame_skip = find_int_arg(argc, argv, "-s", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0);
if(argc < 4){ if(argc < 4){
@ -505,6 +509,7 @@ void run_detector(int argc, char **argv)
return; return;
} }
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
char *outfile = find_char_arg(argc, argv, "-out", 0);
int *gpus = 0; int *gpus = 0;
int gpu = 0; int gpu = 0;
int ngpus = 0; int ngpus = 0;
@ -533,15 +538,15 @@ void run_detector(int argc, char **argv)
char *cfg = argv[4]; char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0; char *weights = (argc > 5) ? argv[5] : 0;
char *filename = (argc > 6) ? argv[6]: 0; char *filename = (argc > 6) ? argv[6]: 0;
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh); if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh);
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) { else if(0==strcmp(argv[2], "demo")) {
list *options = read_data_cfg(datacfg); list *options = read_data_cfg(datacfg);
int classes = option_find_int(options, "classes", 20); int classes = option_find_int(options, "classes", 20);
char *name_list = option_find_str(options, "names", "data/names.list"); char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list); char **names = get_labels(name_list);
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix); demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh);
} }
} }

View File

@ -11,34 +11,88 @@ void free_layer(layer l)
#endif #endif
return; return;
} }
if(l.indexes) free(l.indexes); if(l.cweights) free(l.cweights);
if(l.rand) free(l.rand); if(l.indexes) free(l.indexes);
if(l.cost) free(l.cost); if(l.input_layers) free(l.input_layers);
if(l.biases) free(l.biases); if(l.input_sizes) free(l.input_sizes);
if(l.bias_updates) free(l.bias_updates); if(l.map) free(l.map);
if(l.weights) free(l.weights); if(l.rand) free(l.rand);
if(l.weight_updates) free(l.weight_updates); if(l.cost) free(l.cost);
if(l.col_image) free(l.col_image); if(l.state) free(l.state);
if(l.input_layers) free(l.input_layers); if(l.prev_state) free(l.prev_state);
if(l.input_sizes) free(l.input_sizes); if(l.forgot_state) free(l.forgot_state);
if(l.delta) free(l.delta); if(l.forgot_delta) free(l.forgot_delta);
if(l.output) free(l.output); if(l.state_delta) free(l.state_delta);
if(l.squared) free(l.squared); if(l.concat) free(l.concat);
if(l.norms) free(l.norms); if(l.concat_delta) free(l.concat_delta);
if(l.binary_weights) free(l.binary_weights);
if(l.biases) free(l.biases);
if(l.bias_updates) free(l.bias_updates);
if(l.scales) free(l.scales);
if(l.scale_updates) free(l.scale_updates);
if(l.weights) free(l.weights);
if(l.weight_updates) free(l.weight_updates);
if(l.col_image) free(l.col_image);
if(l.delta) free(l.delta);
if(l.output) free(l.output);
if(l.squared) free(l.squared);
if(l.norms) free(l.norms);
if(l.spatial_mean) free(l.spatial_mean);
if(l.mean) free(l.mean);
if(l.variance) free(l.variance);
if(l.mean_delta) free(l.mean_delta);
if(l.variance_delta) free(l.variance_delta);
if(l.rolling_mean) free(l.rolling_mean);
if(l.rolling_variance) free(l.rolling_variance);
if(l.x) free(l.x);
if(l.x_norm) free(l.x_norm);
if(l.m) free(l.m);
if(l.v) free(l.v);
if(l.z_cpu) free(l.z_cpu);
if(l.r_cpu) free(l.r_cpu);
if(l.h_cpu) free(l.h_cpu);
if(l.binary_input) free(l.binary_input);
#ifdef GPU #ifdef GPU
if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu);
if(l.weights_gpu) cuda_free(l.weights_gpu);
if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); if(l.z_gpu) cuda_free(l.z_gpu);
if(l.col_image_gpu) cuda_free(l.col_image_gpu); if(l.r_gpu) cuda_free(l.r_gpu);
if(l.weights_gpu) cuda_free(l.weights_gpu); if(l.h_gpu) cuda_free(l.h_gpu);
if(l.biases_gpu) cuda_free(l.biases_gpu); if(l.m_gpu) cuda_free(l.m_gpu);
if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); if(l.v_gpu) cuda_free(l.v_gpu);
if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); if(l.prev_state_gpu) cuda_free(l.prev_state_gpu);
if(l.output_gpu) cuda_free(l.output_gpu); if(l.forgot_state_gpu) cuda_free(l.forgot_state_gpu);
if(l.delta_gpu) cuda_free(l.delta_gpu); if(l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu);
if(l.rand_gpu) cuda_free(l.rand_gpu); if(l.state_gpu) cuda_free(l.state_gpu);
if(l.squared_gpu) cuda_free(l.squared_gpu); if(l.state_delta_gpu) cuda_free(l.state_delta_gpu);
if(l.norms_gpu) cuda_free(l.norms_gpu); if(l.gate_gpu) cuda_free(l.gate_gpu);
if(l.gate_delta_gpu) cuda_free(l.gate_delta_gpu);
if(l.save_gpu) cuda_free(l.save_gpu);
if(l.save_delta_gpu) cuda_free(l.save_delta_gpu);
if(l.concat_gpu) cuda_free(l.concat_gpu);
if(l.concat_delta_gpu) cuda_free(l.concat_delta_gpu);
if(l.binary_input_gpu) cuda_free(l.binary_input_gpu);
if(l.binary_weights_gpu) cuda_free(l.binary_weights_gpu);
if(l.mean_gpu) cuda_free(l.mean_gpu);
if(l.variance_gpu) cuda_free(l.variance_gpu);
if(l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu);
if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu);
if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu);
if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu);
if(l.col_image_gpu) cuda_free(l.col_image_gpu);
if(l.x_gpu) cuda_free(l.x_gpu);
if(l.x_norm_gpu) cuda_free(l.x_norm_gpu);
if(l.weights_gpu) cuda_free(l.weights_gpu);
if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu);
if(l.biases_gpu) cuda_free(l.biases_gpu);
if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu);
if(l.scales_gpu) cuda_free(l.scales_gpu);
if(l.scale_updates_gpu) cuda_free(l.scale_updates_gpu);
if(l.output_gpu) cuda_free(l.output_gpu);
if(l.delta_gpu) cuda_free(l.delta_gpu);
if(l.rand_gpu) cuda_free(l.rand_gpu);
if(l.squared_gpu) cuda_free(l.squared_gpu);
if(l.norms_gpu) cuda_free(l.norms_gpu);
#endif #endif
} }

View File

@ -99,14 +99,7 @@ struct layer{
float B1; float B1;
float B2; float B2;
float eps; float eps;
float *m_gpu;
float *v_gpu;
int t; int t;
float *m;
float *v;
tree *softmax_tree;
int *map;
float alpha; float alpha;
float beta; float beta;
@ -129,33 +122,34 @@ struct layer{
float probability; float probability;
float scale; float scale;
int *indexes; char * cweights;
float *rand; int * indexes;
float *cost;
char *cweights;
float *state;
float *prev_state;
float *forgot_state;
float *forgot_delta;
float *state_delta;
float *concat;
float *concat_delta;
float *binary_weights;
float *biases;
float *bias_updates;
float *scales;
float *scale_updates;
float *weights;
float *weight_updates;
float *col_image;
int * input_layers; int * input_layers;
int * input_sizes; int * input_sizes;
int * map;
float * rand;
float * cost;
float * state;
float * prev_state;
float * forgot_state;
float * forgot_delta;
float * state_delta;
float * concat;
float * concat_delta;
float * binary_weights;
float * biases;
float * bias_updates;
float * scales;
float * scale_updates;
float * weights;
float * weight_updates;
float * col_image;
float * delta; float * delta;
float * output; float * output;
float * squared; float * squared;
@ -174,6 +168,15 @@ struct layer{
float * x; float * x;
float * x_norm; float * x_norm;
float * m;
float * v;
float * z_cpu;
float * r_cpu;
float * h_cpu;
float * binary_input;
struct layer *input_layer; struct layer *input_layer;
struct layer *self_layer; struct layer *self_layer;
struct layer *output_layer; struct layer *output_layer;
@ -194,20 +197,20 @@ struct layer{
struct layer *input_h_layer; struct layer *input_h_layer;
struct layer *state_h_layer; struct layer *state_h_layer;
float *z_cpu; tree *softmax_tree;
float *r_cpu;
float *h_cpu;
float *binary_input;
size_t workspace_size; size_t workspace_size;
#ifdef GPU #ifdef GPU
int *indexes_gpu;
float *z_gpu; float *z_gpu;
float *r_gpu; float *r_gpu;
float *h_gpu; float *h_gpu;
int *indexes_gpu; float *m_gpu;
float *v_gpu;
float * prev_state_gpu; float * prev_state_gpu;
float * forgot_state_gpu; float * forgot_state_gpu;
float * forgot_delta_gpu; float * forgot_delta_gpu;

View File

@ -826,7 +826,7 @@ void save_weights_upto(network net, char *filename, int cutoff)
} }
#endif #endif
fprintf(stderr, "Saving weights to %s\n", filename); fprintf(stderr, "Saving weights to %s\n", filename);
FILE *fp = fopen(filename, "w"); FILE *fp = fopen(filename, "wb");
if(!fp) file_error(filename); if(!fp) file_error(filename);
int major = 0; int major = 0;

View File

@ -9,11 +9,9 @@
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#define DOABS 1 layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
{ {
region_layer l = {0}; layer l = {0};
l.type = REGION; l.type = REGION;
l.n = n; l.n = n;
@ -75,12 +73,8 @@ box get_region_box(float *x, float *biases, int n, int index, int i, int j, int
box b; box b;
b.x = (i + logistic_activate(x[index + 0])) / w; b.x = (i + logistic_activate(x[index + 0])) / w;
b.y = (j + logistic_activate(x[index + 1])) / h; b.y = (j + logistic_activate(x[index + 1])) / h;
b.w = exp(x[index + 2]) * biases[2*n]; b.w = exp(x[index + 2]) * biases[2*n] / w;
b.h = exp(x[index + 3]) * biases[2*n+1]; b.h = exp(x[index + 3]) * biases[2*n+1] / h;
if(DOABS){
b.w = exp(x[index + 2]) * biases[2*n] / w;
b.h = exp(x[index + 3]) * biases[2*n+1] / h;
}
return b; return b;
} }
@ -91,12 +85,8 @@ float delta_region_box(box truth, float *x, float *biases, int n, int index, int
float tx = (truth.x*w - i); float tx = (truth.x*w - i);
float ty = (truth.y*h - j); float ty = (truth.y*h - j);
float tw = log(truth.w / biases[2*n]); float tw = log(truth.w*w / biases[2*n]);
float th = log(truth.h / biases[2*n + 1]); float th = log(truth.h*h / biases[2*n + 1]);
if(DOABS){
tw = log(truth.w*w / biases[2*n]);
th = log(truth.h*h / biases[2*n + 1]);
}
delta[index + 0] = scale * (tx - logistic_activate(x[index + 0])) * logistic_gradient(logistic_activate(x[index + 0])); delta[index + 0] = scale * (tx - logistic_activate(x[index + 0])) * logistic_gradient(logistic_activate(x[index + 0]));
delta[index + 1] = scale * (ty - logistic_activate(x[index + 1])) * logistic_gradient(logistic_activate(x[index + 1])); delta[index + 1] = scale * (ty - logistic_activate(x[index + 1])) * logistic_gradient(logistic_activate(x[index + 1]));
@ -141,14 +131,14 @@ float tisnan(float x)
} }
void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output); void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output);
void forward_region_layer(const region_layer l, network_state state) void forward_region_layer(const layer l, network_state state)
{ {
int i,j,b,t,n; int i,j,b,t,n;
int size = l.coords + l.classes + 1; int size = l.coords + l.classes + 1;
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
#ifndef GPU #ifndef GPU
flatten(l.output, l.w*l.h, size*l.n, l.batch, 1); flatten(l.output, l.w*l.h, size*l.n, l.batch, 1);
#endif #endif
for (b = 0; b < l.batch; ++b){ for (b = 0; b < l.batch; ++b){
for(i = 0; i < l.h*l.w*l.n; ++i){ for(i = 0; i < l.h*l.w*l.n; ++i){
int index = size*i + b*l.outputs; int index = size*i + b*l.outputs;
@ -197,6 +187,7 @@ void forward_region_layer(const region_layer l, network_state state)
for(n = 0; n < l.n*l.w*l.h; ++n){ for(n = 0; n < l.n*l.w*l.h; ++n){
int index = size*n + b*l.outputs + 5; int index = size*n + b*l.outputs + 5;
float scale = l.output[index-1]; float scale = l.output[index-1];
l.delta[index - 1] = l.noobject_scale * ((0 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class); float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class);
if(p > maxp){ if(p > maxp){
maxp = p; maxp = p;
@ -205,6 +196,8 @@ void forward_region_layer(const region_layer l, network_state state)
} }
int index = size*maxi + b*l.outputs + 5; int index = size*maxi + b*l.outputs + 5;
delta_region_class(l.output, l.delta, index, class, l.classes, l.softmax_tree, l.class_scale, &avg_cat); delta_region_class(l.output, l.delta, index, class, l.classes, l.softmax_tree, l.class_scale, &avg_cat);
if(l.output[index - 1] < .3) l.delta[index - 1] = l.object_scale * ((.3 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
else l.delta[index - 1] = 0;
++class_count; ++class_count;
onlyclass = 1; onlyclass = 1;
break; break;
@ -218,39 +211,26 @@ void forward_region_layer(const region_layer l, network_state state)
int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
float best_iou = 0; float best_iou = 0;
int best_class = -1;
for(t = 0; t < 30; ++t){ for(t = 0; t < 30; ++t){
box truth = float_to_box(state.truth + t*5 + b*l.truths); box truth = float_to_box(state.truth + t*5 + b*l.truths);
if(!truth.x) break; if(!truth.x) break;
float iou = box_iou(pred, truth); float iou = box_iou(pred, truth);
if (iou > best_iou) { if (iou > best_iou) {
best_class = state.truth[t*5 + b*l.truths + 4];
best_iou = iou; best_iou = iou;
} }
} }
avg_anyobj += l.output[index + 4]; avg_anyobj += l.output[index + 4];
l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
if(l.classfix == -1) l.delta[index + 4] = l.noobject_scale * ((best_iou - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); if (best_iou > l.thresh) {
else{ l.delta[index + 4] = 0;
if (best_iou > l.thresh) {
l.delta[index + 4] = 0;
if(l.classfix > 0){
delta_region_class(l.output, l.delta, index + 5, best_class, l.classes, l.softmax_tree, l.class_scale*(l.classfix == 2 ? l.output[index + 4] : 1), &avg_cat);
++class_count;
}
}
} }
if(*(state.net.seen) < 12800){ if(*(state.net.seen) < 12800){
box truth = {0}; box truth = {0};
truth.x = (i + .5)/l.w; truth.x = (i + .5)/l.w;
truth.y = (j + .5)/l.h; truth.y = (j + .5)/l.h;
truth.w = l.biases[2*n]; truth.w = l.biases[2*n]/l.w;
truth.h = l.biases[2*n+1]; truth.h = l.biases[2*n+1]/l.h;
if(DOABS){
truth.w = l.biases[2*n]/l.w;
truth.h = l.biases[2*n+1]/l.h;
}
delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01); delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01);
} }
} }
@ -274,12 +254,8 @@ void forward_region_layer(const region_layer l, network_state state)
int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
if(l.bias_match){ if(l.bias_match){
pred.w = l.biases[2*n]; pred.w = l.biases[2*n]/l.w;
pred.h = l.biases[2*n+1]; pred.h = l.biases[2*n+1]/l.h;
if(DOABS){
pred.w = l.biases[2*n]/l.w;
pred.h = l.biases[2*n+1]/l.h;
}
} }
//printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h); //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h);
pred.x = 0; pred.x = 0;
@ -313,19 +289,19 @@ void forward_region_layer(const region_layer l, network_state state)
} }
} }
//printf("\n"); //printf("\n");
#ifndef GPU #ifndef GPU
flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0); flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0);
#endif #endif
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count);
} }
void backward_region_layer(const region_layer l, network_state state) void backward_region_layer(const layer l, network_state state)
{ {
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
} }
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map) void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh)
{ {
int i,j,n; int i,j,n;
float *predictions = l.output; float *predictions = l.output;
@ -336,7 +312,6 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
int index = i*l.n + n; int index = i*l.n + n;
int p_index = index * (l.classes + 5) + 4; int p_index = index * (l.classes + 5) + 4;
float scale = predictions[p_index]; float scale = predictions[p_index];
if(l.classfix == -1 && scale < .5) scale = 0;
int box_index = index * (l.classes + 5); int box_index = index * (l.classes + 5);
boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h); boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h);
boxes[index].x *= w; boxes[index].x *= w;
@ -348,22 +323,15 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
if(l.softmax_tree){ if(l.softmax_tree){
hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0); hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);
int found = 0;
if(map){ if(map){
for(j = 0; j < 200; ++j){ for(j = 0; j < 200; ++j){
float prob = scale*predictions[class_index+map[j]]; float prob = scale*predictions[class_index+map[j]];
probs[index][j] = (prob > thresh) ? prob : 0; probs[index][j] = (prob > thresh) ? prob : 0;
} }
} else { } else {
for(j = l.classes - 1; j >= 0; --j){ int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh);
if(!found && predictions[class_index + j] > .5){ probs[index][j] = (scale > thresh) ? scale : 0;
found = 1; probs[index][l.classes] = scale;
} else {
predictions[class_index + j] = 0;
}
float prob = predictions[class_index+j];
probs[index][j] = (scale > thresh) ? prob : 0;
}
} }
} else { } else {
for(j = 0; j < l.classes; ++j){ for(j = 0; j < l.classes; ++j){
@ -380,7 +348,7 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
#ifdef GPU #ifdef GPU
void forward_region_layer_gpu(const region_layer l, network_state state) void forward_region_layer_gpu(const layer l, network_state state)
{ {
/* /*
if(!state.train){ if(!state.train){
@ -421,7 +389,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state)
if(cpu_state.truth) free(cpu_state.truth); if(cpu_state.truth) free(cpu_state.truth);
} }
void backward_region_layer_gpu(region_layer l, network_state state) void backward_region_layer_gpu(layer l, network_state state)
{ {
flatten_ongpu(l.delta_gpu, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 0, state.delta); flatten_ongpu(l.delta_gpu, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 0, state.delta);
} }

View File

@ -4,17 +4,15 @@
#include "layer.h" #include "layer.h"
#include "network.h" #include "network.h"
typedef layer region_layer; layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
void forward_region_layer(const layer l, network_state state);
region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords); void backward_region_layer(const layer l, network_state state);
void forward_region_layer(const region_layer l, network_state state); void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh);
void backward_region_layer(const region_layer l, network_state state);
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map);
void resize_region_layer(layer *l, int w, int h); void resize_region_layer(layer *l, int w, int h);
#ifdef GPU #ifdef GPU
void forward_region_layer_gpu(const region_layer l, network_state state); void forward_region_layer_gpu(const layer l, network_state state);
void backward_region_layer_gpu(region_layer l, network_state state); void backward_region_layer_gpu(layer l, network_state state);
#endif #endif
#endif #endif

View File

@ -1,205 +0,0 @@
#include <stdio.h> /* needed for sockaddr_in */
#include <string.h> /* needed for sockaddr_in */
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h> /* needed for sockaddr_in */
#include <netdb.h>
#include <pthread.h>
#include <time.h>
#include "mini_blas.h"
#include "utils.h"
#include "parser.h"
#include "server.h"
#include "connected_layer.h"
#include "convolutional_layer.h"
#define SERVER_PORT 9423
#define STR(x) #x
int socket_setup(int server)
{
int fd = 0; /* our socket */
struct sockaddr_in me; /* our address */
/* create a UDP socket */
if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
error("cannot create socket");
}
/* bind the socket to any valid IP address and a specific port */
if (server == 1){
bzero((char *) &me, sizeof(me));
me.sin_family = AF_INET;
me.sin_addr.s_addr = htonl(INADDR_ANY);
me.sin_port = htons(SERVER_PORT);
if (bind(fd, (struct sockaddr *)&me, sizeof(me)) < 0) {
error("bind failed");
}
}
return fd;
}
typedef struct{
int fd;
int counter;
network net;
} connection_info;
void read_and_add_into(int fd, float *a, int n)
{
float *buff = calloc(n, sizeof(float));
read_all(fd, (char*) buff, n*sizeof(float));
axpy_cpu(n, 1, buff, 1, a, 1);
free(buff);
}
void handle_connection(void *pointer)
{
connection_info info = *(connection_info *) pointer;
free(pointer);
//printf("New Connection\n");
if(info.counter%100==0){
char buff[256];
sprintf(buff, "unikitty/net_%d.part", info.counter);
save_network(info.net, buff);
}
int fd = info.fd;
network net = info.net;
int i;
for(i = 0; i < net.n; ++i){
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *) net.layers[i];
read_and_add_into(fd, layer.bias_updates, layer.n);
int num = layer.n*layer.c*layer.size*layer.size;
read_and_add_into(fd, layer.filter_updates, num);
}
if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *) net.layers[i];
read_and_add_into(fd, layer.bias_updates, layer.outputs);
read_and_add_into(fd, layer.weight_updates, layer.inputs*layer.outputs);
}
}
for(i = 0; i < net.n; ++i){
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *) net.layers[i];
update_convolutional_layer(layer);
write_all(fd, (char*) layer.biases, layer.n*sizeof(float));
int num = layer.n*layer.c*layer.size*layer.size;
write_all(fd, (char*) layer.filters, num*sizeof(float));
}
if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *) net.layers[i];
update_connected_layer(layer);
write_all(fd, (char *)layer.biases, layer.outputs*sizeof(float));
write_all(fd, (char *)layer.weights, layer.outputs*layer.inputs*sizeof(float));
}
}
//printf("Received updates\n");
close(fd);
}
void server_update(network net)
{
int fd = socket_setup(1);
int counter = 18000;
listen(fd, 64);
struct sockaddr_in client; /* remote address */
socklen_t client_size = sizeof(client); /* length of addresses */
time_t t=0;
while(1){
connection_info *info = calloc(1, sizeof(connection_info));
info->net = net;
info->counter = counter;
pthread_t worker;
int connection = accept(fd, (struct sockaddr *) &client, &client_size);
if(!t) t=time(0);
info->fd = connection;
pthread_create(&worker, NULL, (void *) &handle_connection, info);
++counter;
printf("%d\n", counter);
//if(counter == 1024) break;
}
close(fd);
}
void client_update(network net, char *address)
{
int fd = socket_setup(0);
struct hostent *hp; /* host information */
struct sockaddr_in server; /* server address */
/* fill in the server's address and data */
bzero((char*)&server, sizeof(server));
server.sin_family = AF_INET;
server.sin_port = htons(SERVER_PORT);
/* look up the address of the server given its name */
hp = gethostbyname(address);
if (!hp) {
perror("no such host");
fprintf(stderr, "could not obtain address of %s\n", "localhost");
}
/* put the host's address into the server address structure */
memcpy((void *)&server.sin_addr, hp->h_addr_list[0], hp->h_length);
if (connect(fd, (struct sockaddr *) &server, sizeof(server)) < 0) {
error("error connecting");
}
/* send a message to the server */
int i;
//printf("Sending\n");
for(i = 0; i < net.n; ++i){
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *) net.layers[i];
write_all(fd, (char*) layer.bias_updates, layer.n*sizeof(float));
int num = layer.n*layer.c*layer.size*layer.size;
write_all(fd, (char*) layer.filter_updates, num*sizeof(float));
memset(layer.bias_updates, 0, layer.n*sizeof(float));
memset(layer.filter_updates, 0, num*sizeof(float));
}
if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *) net.layers[i];
write_all(fd, (char *)layer.bias_updates, layer.outputs*sizeof(float));
write_all(fd, (char *)layer.weight_updates, layer.outputs*layer.inputs*sizeof(float));
memset(layer.bias_updates, 0, layer.outputs*sizeof(float));
memset(layer.weight_updates, 0, layer.inputs*layer.outputs*sizeof(float));
}
}
//printf("Sent\n");
for(i = 0; i < net.n; ++i){
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *) net.layers[i];
read_all(fd, (char*) layer.biases, layer.n*sizeof(float));
int num = layer.n*layer.c*layer.size*layer.size;
read_all(fd, (char*) layer.filters, num*sizeof(float));
#ifdef GPU
push_convolutional_layer(layer);
#endif
}
if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *) net.layers[i];
read_all(fd, (char *)layer.biases, layer.outputs*sizeof(float));
read_all(fd, (char *)layer.weights, layer.outputs*layer.inputs*sizeof(float));
#ifdef GPU
push_connected_layer(layer);
#endif
}
}
//printf("Updated\n");
close(fd);
}

View File

@ -1,4 +0,0 @@
#include "network.h"
void client_update(network net, char *address);
void server_update(network net);

View File

@ -50,11 +50,39 @@ void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leave
} }
} }
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh)
{
float p = 1;
int group = 0;
int i;
while(1){
float max = 0;
int max_i = 0;
for(i = 0; i < hier->group_size[group]; ++i){
int index = i + hier->group_offset[group];
float val = predictions[i + hier->group_offset[group]];
if(val > max){
max_i = index;
max = val;
}
}
if(p*max > thresh){
p = p*max;
group = hier->child[max_i];
if(hier->child[max_i] < 0) return max_i;
} else {
return hier->parent[hier->group_offset[group]];
}
}
return 0;
}
tree *read_tree(char *filename) tree *read_tree(char *filename)
{ {
tree t = {0}; tree t = {0};
FILE *fp = fopen(filename, "r"); FILE *fp = fopen(filename, "r");
char *line; char *line;
int last_parent = -1; int last_parent = -1;
int group_size = 0; int group_size = 0;
@ -67,6 +95,9 @@ tree *read_tree(char *filename)
t.parent = realloc(t.parent, (n+1)*sizeof(int)); t.parent = realloc(t.parent, (n+1)*sizeof(int));
t.parent[n] = parent; t.parent[n] = parent;
t.child = realloc(t.child, (n+1)*sizeof(int));
t.child[n] = -1;
t.name = realloc(t.name, (n+1)*sizeof(char *)); t.name = realloc(t.name, (n+1)*sizeof(char *));
t.name[n] = id; t.name[n] = id;
if(parent != last_parent){ if(parent != last_parent){
@ -80,6 +111,9 @@ tree *read_tree(char *filename)
} }
t.group = realloc(t.group, (n+1)*sizeof(int)); t.group = realloc(t.group, (n+1)*sizeof(int));
t.group[n] = groups; t.group[n] = groups;
if (parent >= 0) {
t.child[parent] = groups;
}
++n; ++n;
++group_size; ++group_size;
} }

View File

@ -5,6 +5,7 @@ typedef struct{
int *leaf; int *leaf;
int n; int n;
int *parent; int *parent;
int *child;
int *group; int *group;
char **name; char **name;
@ -16,6 +17,7 @@ typedef struct{
tree *read_tree(char *filename); tree *read_tree(char *filename);
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves); void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves);
void change_leaves(tree *t, char *leaf_list); void change_leaves(tree *t, char *leaf_list);
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh);
float get_hierarchy_probability(float *x, tree *hier, int c); float get_hierarchy_probability(float *x, tree *hier, int c);
#endif #endif

View File

@ -351,5 +351,5 @@ void run_yolo(int argc, char **argv)
else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix); else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, .5);
} }

View File

@ -1,132 +0,0 @@
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"
extern "C" {
#include "network.h"
#include "detection_layer.h"
#include "cost_layer.h"
#include "utils.h"
#include "parser.h"
#include "box.h"
#include "image.h"
#include <sys/time.h>
}
#ifdef OPENCV
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
extern "C" image ipl_to_image(IplImage* src);
extern "C" void convert_yolo_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
extern "C" char *voc_names[];
extern "C" image voc_labels[];
static float **probs;
static box *boxes;
static network net;
static image in ;
static image in_s ;
static image det ;
static image det_s;
static image disp ;
static cv::VideoCapture cap;
static float fps = 0;
static float demo_thresh = 0;
void *fetch_in_thread(void *ptr)
{
cv::Mat frame_m;
cap >> frame_m;
IplImage frame = frame_m;
in = ipl_to_image(&frame);
rgbgr_image(in);
in_s = resize_image(in, net.w, net.h);
return 0;
}
void *detect_in_thread(void *ptr)
{
float nms = .4;
detection_layer l = net.layers[net.n-1];
float *X = det_s.data;
float *predictions = network_predict(net, X);
free_image(det_s);
convert_yolo_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, demo_thresh, probs, boxes, 0);
if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms);
printf("\033[2J");
printf("\033[1;1H");
printf("\nFPS:%.0f\n",fps);
printf("Objects:\n\n");
draw_detections(det, l.side*l.side*l.n, demo_thresh, boxes, probs, voc_names, voc_labels, 20);
return 0;
}
extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index)
{
demo_thresh = thresh;
printf("YOLO demo\n");
net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 1);
srand(2222222);
cv::VideoCapture cam(cam_index);
cap = cam;
if(!cap.isOpened()) error("Couldn't connect to webcam.\n");
detection_layer l = net.layers[net.n-1];
int j;
boxes = (box *)calloc(l.side*l.side*l.n, sizeof(box));
probs = (float **)calloc(l.side*l.side*l.n, sizeof(float *));
for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *));
pthread_t fetch_thread;
pthread_t detect_thread;
fetch_in_thread(0);
det = in;
det_s = in_s;
fetch_in_thread(0);
detect_in_thread(0);
disp = det;
det = in;
det_s = in_s;
cvNamedWindow("YOLO", CV_WINDOW_NORMAL);
cvMoveWindow("YOLO", 0, 0);
cvResizeWindow("YOLO", 1352, 1013);
while(1){
struct timeval tval_before, tval_after, tval_result;
gettimeofday(&tval_before, NULL);
if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
show_image(disp, "YOLO");
free_image(disp);
cvWaitKey(1);
pthread_join(fetch_thread, 0);
pthread_join(detect_thread, 0);
disp = det;
det = in;
det_s = in_s;
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
float curr = 1000000.f/((long int)tval_result.tv_usec);
fps = .9*fps + .1*curr;
}
}
#else
extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index){
fprintf(stderr, "YOLO demo needs OpenCV for webcam images.\n");
}
#endif