mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
So I have this new programming paradigm.......
This commit is contained in:
parent
c899cc19f4
commit
481b57a96a
4
Makefile
4
Makefile
@ -41,10 +41,10 @@ CFLAGS+= -DCUDNN
|
||||
LDFLAGS+= -lcudnn
|
||||
endif
|
||||
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
|
||||
OBJ=gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
|
||||
ifeq ($(GPU), 1)
|
||||
LDFLAGS+= -lstdc++
|
||||
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
|
||||
OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
|
||||
endif
|
||||
|
||||
OBJS = $(addprefix $(OBJDIR), $(OBJ))
|
||||
|
@ -1,6 +1,19 @@
|
||||
import os
|
||||
import string
|
||||
import pipes
|
||||
|
||||
l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
|
||||
#l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
|
||||
|
||||
l = string.printable
|
||||
|
||||
for word in l:
|
||||
os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word))
|
||||
#os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word))
|
||||
if word == ' ':
|
||||
os.system('convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:"\ " 32.png')
|
||||
elif word == '\\':
|
||||
os.system('convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:"\\\\\\\\" 92.png')
|
||||
elif ord(word) in [9,10,11,12,13,14]:
|
||||
pass
|
||||
else:
|
||||
os.system("convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:%s \"%d.png\""%(pipes.quote(word), ord(word)))
|
||||
|
||||
|
@ -21,7 +21,12 @@ layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
|
||||
l.output = calloc(batch*inputs, sizeof(float*));
|
||||
l.delta = calloc(batch*inputs, sizeof(float*));
|
||||
|
||||
l.forward = forward_activation_layer;
|
||||
l.backward = backward_activation_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_activation_layer_gpu;
|
||||
l.backward_gpu = backward_activation_layer_gpu;
|
||||
|
||||
l.output_gpu = cuda_make_array(l.output, inputs*batch);
|
||||
l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
|
||||
#endif
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
image get_image_from_stream(CvCapture *cap);
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -19,7 +19,11 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
|
||||
int output_size = l.outputs * batch;
|
||||
l.output = calloc(output_size, sizeof(float));
|
||||
l.delta = calloc(output_size, sizeof(float));
|
||||
l.forward = forward_avgpool_layer;
|
||||
l.backward = backward_avgpool_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_avgpool_layer_gpu;
|
||||
l.backward_gpu = backward_avgpool_layer_gpu;
|
||||
l.output_gpu = cuda_make_array(l.output, output_size);
|
||||
l.delta_gpu = cuda_make_array(l.delta, output_size);
|
||||
#endif
|
||||
|
@ -28,7 +28,13 @@ layer make_batchnorm_layer(int batch, int w, int h, int c)
|
||||
|
||||
layer.rolling_mean = calloc(c, sizeof(float));
|
||||
layer.rolling_variance = calloc(c, sizeof(float));
|
||||
|
||||
layer.forward = forward_batchnorm_layer;
|
||||
layer.backward = backward_batchnorm_layer;
|
||||
#ifdef GPU
|
||||
layer.forward_gpu = forward_batchnorm_layer_gpu;
|
||||
layer.backward_gpu = backward_batchnorm_layer_gpu;
|
||||
|
||||
layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
|
||||
layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
image get_image_from_stream(CvCapture *cap);
|
||||
#endif
|
||||
|
||||
list *read_data_cfg(char *filename)
|
||||
@ -57,25 +58,26 @@ void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int
|
||||
#ifdef GPU
|
||||
int i;
|
||||
|
||||
srand(time(0));
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
printf("%d\n", ngpus);
|
||||
network *nets = calloc(ngpus, sizeof(network));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
srand(seed);
|
||||
cuda_set_device(gpus[i]);
|
||||
nets[i] = parse_network_cfg(cfgfile);
|
||||
if(clear) *nets[i].seen = 0;
|
||||
if(weightfile){
|
||||
load_weights(&nets[i], weightfile);
|
||||
}
|
||||
}
|
||||
network net = nets[0];
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
*nets[i].seen = *net.seen;
|
||||
if(clear) *nets[i].seen = 0;
|
||||
nets[i].learning_rate *= ngpus;
|
||||
}
|
||||
srand(time(0));
|
||||
network net = nets[0];
|
||||
|
||||
int imgs = net.batch * net.subdivisions * ngpus;
|
||||
|
||||
|
40
src/coco.c
40
src/coco.c
@ -12,14 +12,10 @@
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#endif
|
||||
|
||||
void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
||||
|
||||
char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};
|
||||
|
||||
int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
|
||||
|
||||
image coco_labels[80];
|
||||
|
||||
void train_coco(char *cfgfile, char *weightfile)
|
||||
{
|
||||
//char *train_images = "/home/pjreddie/data/voc/test/train.txt";
|
||||
@ -160,7 +156,6 @@ void validate_coco(char *cfgfile, char *weightfile)
|
||||
|
||||
layer l = net.layers[net.n-1];
|
||||
int classes = l.classes;
|
||||
int square = l.sqrt;
|
||||
int side = l.side;
|
||||
|
||||
int j;
|
||||
@ -217,10 +212,10 @@ void validate_coco(char *cfgfile, char *weightfile)
|
||||
char *path = paths[i+t-nthreads];
|
||||
int image_id = get_coco_image_id(path);
|
||||
float *X = val_resized[t].data;
|
||||
float *predictions = network_predict(net, X);
|
||||
network_predict(net, X);
|
||||
int w = val[t].w;
|
||||
int h = val[t].h;
|
||||
convert_detections(predictions, classes, l.n, square, side, w, h, thresh, probs, boxes, 0);
|
||||
get_detection_boxes(l, w, h, thresh, probs, boxes, 0);
|
||||
if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, iou_thresh);
|
||||
print_cocos(fp, image_id, boxes, probs, side*side*l.n, classes, w, h);
|
||||
free_image(val[t]);
|
||||
@ -250,7 +245,6 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
|
||||
|
||||
layer l = net.layers[net.n-1];
|
||||
int classes = l.classes;
|
||||
int square = l.sqrt;
|
||||
int side = l.side;
|
||||
|
||||
int j, k;
|
||||
@ -282,14 +276,15 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
|
||||
image orig = load_image_color(path, 0, 0);
|
||||
image sized = resize_image(orig, net.w, net.h);
|
||||
char *id = basecfg(path);
|
||||
float *predictions = network_predict(net, sized.data);
|
||||
convert_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1);
|
||||
network_predict(net, sized.data);
|
||||
get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1);
|
||||
if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh);
|
||||
|
||||
char *labelpath = find_replace(path, "images", "labels");
|
||||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
char labelpath[4096];
|
||||
find_replace(path, "images", "labels", labelpath);
|
||||
find_replace(labelpath, "JPEGImages", "labels", labelpath);
|
||||
find_replace(labelpath, ".jpg", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPEG", ".txt", labelpath);
|
||||
|
||||
int num_labels = 0;
|
||||
box_label *truth = read_boxes(labelpath, &num_labels);
|
||||
@ -323,7 +318,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
|
||||
|
||||
void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
{
|
||||
|
||||
image *alphabet = load_alphabet();
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
@ -353,11 +348,11 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
image sized = resize_image(im, net.w, net.h);
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
network_predict(net, X);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
|
||||
get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0);
|
||||
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
||||
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80);
|
||||
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, alphabet, 80);
|
||||
save_image(im, "prediction");
|
||||
show_image(im, "predictions");
|
||||
free_image(im);
|
||||
@ -372,12 +367,7 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
|
||||
void run_coco(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < 80; ++i){
|
||||
char buff[256];
|
||||
sprintf(buff, "data/labels/%s.png", coco_classes[i]);
|
||||
coco_labels[i] = load_image_color(buff, 0, 0);
|
||||
}
|
||||
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
|
||||
float thresh = find_float_arg(argc, argv, "-thresh", .2);
|
||||
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||
int frame_skip = find_int_arg(argc, argv, "-s", 0);
|
||||
@ -394,5 +384,5 @@ void run_coco(int argc, char **argv)
|
||||
else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, coco_labels, 80, frame_skip);
|
||||
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix);
|
||||
}
|
||||
|
@ -36,6 +36,10 @@ connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVAT
|
||||
l.weights = calloc(outputs*inputs, sizeof(float));
|
||||
l.biases = calloc(outputs, sizeof(float));
|
||||
|
||||
l.forward = forward_connected_layer;
|
||||
l.backward = backward_connected_layer;
|
||||
l.update = update_connected_layer;
|
||||
|
||||
//float scale = 1./sqrt(inputs);
|
||||
float scale = sqrt(2./inputs);
|
||||
for(i = 0; i < outputs*inputs; ++i){
|
||||
@ -66,6 +70,10 @@ connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVAT
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_connected_layer_gpu;
|
||||
l.backward_gpu = backward_connected_layer_gpu;
|
||||
l.update_gpu = update_connected_layer_gpu;
|
||||
|
||||
l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
|
||||
l.biases_gpu = cuda_make_array(l.biases, outputs);
|
||||
|
||||
|
@ -209,6 +209,9 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
||||
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
|
||||
l.forward = forward_convolutional_layer;
|
||||
l.backward = backward_convolutional_layer;
|
||||
l.update = update_convolutional_layer;
|
||||
if(binary){
|
||||
l.binary_weights = calloc(c*n*size*size, sizeof(float));
|
||||
l.cweights = calloc(c*n*size*size, sizeof(char));
|
||||
@ -234,6 +237,10 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_convolutional_layer_gpu;
|
||||
l.backward_gpu = backward_convolutional_layer_gpu;
|
||||
l.update_gpu = update_convolutional_layer_gpu;
|
||||
|
||||
if(gpu_index >= 0){
|
||||
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
|
||||
|
@ -43,7 +43,13 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca
|
||||
l.delta = calloc(inputs*batch, sizeof(float));
|
||||
l.output = calloc(inputs*batch, sizeof(float));
|
||||
l.cost = calloc(1, sizeof(float));
|
||||
|
||||
l.forward = forward_cost_layer;
|
||||
l.backward = backward_cost_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_cost_layer_gpu;
|
||||
l.backward_gpu = backward_cost_layer_gpu;
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.output, inputs*batch);
|
||||
l.output_gpu = cuda_make_array(l.delta, inputs*batch);
|
||||
#endif
|
||||
|
@ -64,7 +64,15 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
|
||||
l.output = l.output_layer->output;
|
||||
l.delta = l.output_layer->delta;
|
||||
|
||||
l.forward = forward_crnn_layer;
|
||||
l.backward = backward_crnn_layer;
|
||||
l.update = update_crnn_layer;
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_crnn_layer_gpu;
|
||||
l.backward_gpu = backward_crnn_layer_gpu;
|
||||
l.update_gpu = update_crnn_layer_gpu;
|
||||
|
||||
l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
|
||||
l.output_gpu = l.output_layer->output_gpu;
|
||||
l.delta_gpu = l.output_layer->delta_gpu;
|
||||
|
@ -10,6 +10,9 @@ image get_crop_image(crop_layer l)
|
||||
return float_to_image(w,h,c,l.output);
|
||||
}
|
||||
|
||||
void backward_crop_layer(const crop_layer l, network_state state){}
|
||||
void backward_crop_layer_gpu(const crop_layer l, network_state state){}
|
||||
|
||||
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
|
||||
{
|
||||
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
|
||||
@ -30,7 +33,12 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
l.outputs = l.out_w * l.out_h * l.out_c;
|
||||
l.output = calloc(l.outputs*batch, sizeof(float));
|
||||
l.forward = forward_crop_layer;
|
||||
l.backward = backward_crop_layer;
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_crop_layer_gpu;
|
||||
l.backward_gpu = backward_crop_layer_gpu;
|
||||
l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
|
||||
l.rand_gpu = cuda_make_array(0, l.batch*8);
|
||||
#endif
|
||||
|
@ -136,17 +136,6 @@ void partial(char *cfgfile, char *weightfile, char *outfile, int max)
|
||||
save_weights_upto(net, outfile, max);
|
||||
}
|
||||
|
||||
void stacked(char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
gpu_index = -1;
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
net.seen = 0;
|
||||
save_weights_double(net, outfile);
|
||||
}
|
||||
|
||||
#include "convolutional_layer.h"
|
||||
void rescale_net(char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
@ -420,8 +409,6 @@ int main(int argc, char **argv)
|
||||
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
|
||||
} else if (0 == strcmp(argv[1], "average")){
|
||||
average(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "stacked")){
|
||||
stacked(argv[2], argv[3], argv[4]);
|
||||
} else if (0 == strcmp(argv[1], "visualize")){
|
||||
visualize(argv[2], (argc > 3) ? argv[3] : 0);
|
||||
} else if (0 == strcmp(argv[1], "imtest")){
|
||||
|
60
src/data.c
60
src/data.c
@ -47,7 +47,7 @@ char **get_random_paths(char **paths, int n, int m)
|
||||
for(i = 0; i < n; ++i){
|
||||
int index = rand()%m;
|
||||
random_paths[i] = paths[index];
|
||||
if(i == 0) printf("%s\n", paths[index]);
|
||||
//if(i == 0) printf("%s\n", paths[index]);
|
||||
}
|
||||
pthread_mutex_unlock(&mutex);
|
||||
return random_paths;
|
||||
@ -58,7 +58,8 @@ char **find_replace_paths(char **paths, int n, char *find, char *replace)
|
||||
char **replace_paths = calloc(n, sizeof(char*));
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
char *replaced = find_replace(paths[i], find, replace);
|
||||
char replaced[4096];
|
||||
find_replace(paths[i], find, replace, replaced);
|
||||
replace_paths[i] = copy_string(replaced);
|
||||
}
|
||||
return replace_paths;
|
||||
@ -198,12 +199,13 @@ void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float
|
||||
|
||||
void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
|
||||
{
|
||||
char *labelpath = find_replace(path, "images", "labels");
|
||||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
char labelpath[4096];
|
||||
find_replace(path, "images", "labels", labelpath);
|
||||
find_replace(labelpath, "JPEGImages", "labels", labelpath);
|
||||
find_replace(labelpath, ".jpg", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPG", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPEG", ".txt", labelpath);
|
||||
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPG", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
int count = 0;
|
||||
box_label *boxes = read_boxes(labelpath, &count);
|
||||
randomize_boxes(boxes, count);
|
||||
@ -235,13 +237,14 @@ void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx,
|
||||
|
||||
void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy)
|
||||
{
|
||||
char *labelpath = find_replace(path, "images", "labels");
|
||||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
char labelpath[4096];
|
||||
find_replace(path, "images", "labels", labelpath);
|
||||
find_replace(labelpath, "JPEGImages", "labels", labelpath);
|
||||
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".png", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPG", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
find_replace(labelpath, ".jpg", ".txt", labelpath);
|
||||
find_replace(labelpath, ".png", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPG", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPEG", ".txt", labelpath);
|
||||
int count = 0;
|
||||
box_label *boxes = read_boxes(labelpath, &count);
|
||||
randomize_boxes(boxes, count);
|
||||
@ -282,13 +285,14 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
|
||||
|
||||
void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
|
||||
{
|
||||
char *labelpath = find_replace(path, "images", "labels");
|
||||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
char labelpath[4096];
|
||||
find_replace(path, "images", "labels", labelpath);
|
||||
find_replace(labelpath, "JPEGImages", "labels", labelpath);
|
||||
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".png", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPG", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
find_replace(labelpath, ".jpg", ".txt", labelpath);
|
||||
find_replace(labelpath, ".png", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPG", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPEG", ".txt", labelpath);
|
||||
int count = 0;
|
||||
box_label *boxes = read_boxes(labelpath, &count);
|
||||
randomize_boxes(boxes, count);
|
||||
@ -400,11 +404,12 @@ matrix load_tags_paths(char **paths, int n, int k)
|
||||
int i;
|
||||
int count = 0;
|
||||
for(i = 0; i < n; ++i){
|
||||
char *label = find_replace(paths[i], "imgs", "labels");
|
||||
label = find_replace(label, "_iconl.jpeg", ".txt");
|
||||
char label[4096];
|
||||
find_replace(paths[i], "imgs", "labels", label);
|
||||
find_replace(label, "_iconl.jpeg", ".txt", label);
|
||||
FILE *file = fopen(label, "r");
|
||||
if(!file){
|
||||
label = find_replace(label, "labels", "labels2");
|
||||
find_replace(label, "labels", "labels2", label);
|
||||
file = fopen(label, "r");
|
||||
if(!file) continue;
|
||||
}
|
||||
@ -518,16 +523,18 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
|
||||
int id;
|
||||
float iou;
|
||||
|
||||
char *imlabel1 = find_replace(paths[i*2], "imgs", "labels");
|
||||
imlabel1 = find_replace(imlabel1, "jpg", "txt");
|
||||
char imlabel1[4096];
|
||||
char imlabel2[4096];
|
||||
find_replace(paths[i*2], "imgs", "labels", imlabel1);
|
||||
find_replace(imlabel1, "jpg", "txt", imlabel1);
|
||||
FILE *fp1 = fopen(imlabel1, "r");
|
||||
|
||||
while(fscanf(fp1, "%d %f", &id, &iou) == 2){
|
||||
if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou;
|
||||
}
|
||||
|
||||
char *imlabel2 = find_replace(paths[i*2+1], "imgs", "labels");
|
||||
imlabel2 = find_replace(imlabel2, "jpg", "txt");
|
||||
find_replace(paths[i*2+1], "imgs", "labels", imlabel2);
|
||||
find_replace(imlabel2, "jpg", "txt", imlabel2);
|
||||
FILE *fp2 = fopen(imlabel2, "r");
|
||||
|
||||
while(fscanf(fp2, "%d %f", &id, &iou) == 2){
|
||||
@ -709,6 +716,7 @@ void *load_threads(void *ptr)
|
||||
{
|
||||
int i;
|
||||
load_args args = *(load_args *)ptr;
|
||||
if (args.threads == 0) args.threads = 1;
|
||||
data *out = args.d;
|
||||
int total = args.n;
|
||||
free(ptr);
|
||||
|
@ -80,6 +80,10 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
|
||||
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
|
||||
l.forward = forward_deconvolutional_layer;
|
||||
l.backward = backward_deconvolutional_layer;
|
||||
l.update = update_deconvolutional_layer;
|
||||
|
||||
#ifdef GPU
|
||||
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
|
||||
|
47
src/demo.c
47
src/demo.c
@ -1,5 +1,6 @@
|
||||
#include "network.h"
|
||||
#include "detection_layer.h"
|
||||
#include "region_layer.h"
|
||||
#include "cost_layer.h"
|
||||
#include "utils.h"
|
||||
#include "parser.h"
|
||||
@ -13,10 +14,10 @@
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#include "opencv2/imgproc/imgproc_c.h"
|
||||
void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
||||
image get_image_from_stream(CvCapture *cap);
|
||||
|
||||
static char **demo_names;
|
||||
static image *demo_labels;
|
||||
static image *demo_alphabet;
|
||||
static int demo_classes;
|
||||
|
||||
static float **probs;
|
||||
@ -50,16 +51,23 @@ void *detect_in_thread(void *ptr)
|
||||
{
|
||||
float nms = .4;
|
||||
|
||||
detection_layer l = net.layers[net.n-1];
|
||||
layer l = net.layers[net.n-1];
|
||||
float *X = det_s.data;
|
||||
float *prediction = network_predict(net, X);
|
||||
|
||||
memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
|
||||
mean_arrays(predictions, FRAMES, l.outputs, avg);
|
||||
l.output = avg;
|
||||
|
||||
free_image(det_s);
|
||||
convert_detections(avg, l.classes, l.n, l.sqrt, l.side, 1, 1, demo_thresh, probs, boxes, 0);
|
||||
if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
||||
if(l.type == DETECTION){
|
||||
get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
|
||||
} else if (l.type == REGION){
|
||||
get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
|
||||
} else {
|
||||
error("Last layer must produce detections\n");
|
||||
}
|
||||
if (nms > 0) do_nms(boxes, probs, l.w*l.h*l.n, l.classes, nms);
|
||||
printf("\033[2J");
|
||||
printf("\033[1;1H");
|
||||
printf("\nFPS:%.1f\n",fps);
|
||||
@ -69,7 +77,7 @@ void *detect_in_thread(void *ptr)
|
||||
det = images[(demo_index + FRAMES/2 + 1)%FRAMES];
|
||||
demo_index = (demo_index + 1)%FRAMES;
|
||||
|
||||
draw_detections(det, l.side*l.side*l.n, demo_thresh, boxes, probs, demo_names, demo_labels, demo_classes);
|
||||
draw_detections(det, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -83,12 +91,13 @@ double get_wall_time()
|
||||
return (double)time.tv_sec + (double)time.tv_usec * .000001;
|
||||
}
|
||||
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip)
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix)
|
||||
{
|
||||
//skip = frame_skip;
|
||||
image *alphabet = load_alphabet();
|
||||
int delay = frame_skip;
|
||||
demo_names = names;
|
||||
demo_labels = labels;
|
||||
demo_alphabet = alphabet;
|
||||
demo_classes = classes;
|
||||
demo_thresh = thresh;
|
||||
printf("Demo\n");
|
||||
@ -108,16 +117,16 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
||||
|
||||
if(!cap) error("Couldn't connect to webcam.\n");
|
||||
|
||||
detection_layer l = net.layers[net.n-1];
|
||||
layer l = net.layers[net.n-1];
|
||||
int j;
|
||||
|
||||
avg = (float *) calloc(l.outputs, sizeof(float));
|
||||
for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
|
||||
for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3);
|
||||
|
||||
boxes = (box *)calloc(l.side*l.side*l.n, sizeof(box));
|
||||
probs = (float **)calloc(l.side*l.side*l.n, sizeof(float *));
|
||||
for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *));
|
||||
boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box));
|
||||
probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *));
|
||||
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *));
|
||||
|
||||
pthread_t fetch_thread;
|
||||
pthread_t detect_thread;
|
||||
@ -141,9 +150,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
cvNamedWindow("Demo", CV_WINDOW_NORMAL);
|
||||
cvMoveWindow("Demo", 0, 0);
|
||||
cvResizeWindow("Demo", 1352, 1013);
|
||||
if(!prefix){
|
||||
cvNamedWindow("Demo", CV_WINDOW_NORMAL);
|
||||
cvMoveWindow("Demo", 0, 0);
|
||||
cvResizeWindow("Demo", 1352, 1013);
|
||||
}
|
||||
|
||||
double before = get_wall_time();
|
||||
|
||||
@ -153,7 +164,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
||||
if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
|
||||
if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
|
||||
|
||||
if(1){
|
||||
if(!prefix){
|
||||
show_image(disp, "Demo");
|
||||
int c = cvWaitKey(1);
|
||||
if (c == 10){
|
||||
@ -164,7 +175,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
||||
}
|
||||
}else{
|
||||
char buff[256];
|
||||
sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count);
|
||||
sprintf(buff, "%s_%08d", prefix, count);
|
||||
save_image(disp, buff);
|
||||
}
|
||||
|
||||
@ -201,7 +212,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
||||
}
|
||||
}
|
||||
#else
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip)
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix)
|
||||
{
|
||||
fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
|
||||
}
|
||||
|
@ -2,6 +2,6 @@
|
||||
#define DEMO
|
||||
|
||||
#include "image.h"
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip);
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix);
|
||||
|
||||
#endif
|
||||
|
@ -30,7 +30,12 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
|
||||
l.truths = l.side*l.side*(1+l.coords+l.classes);
|
||||
l.output = calloc(batch*l.outputs, sizeof(float));
|
||||
l.delta = calloc(batch*l.outputs, sizeof(float));
|
||||
|
||||
l.forward = forward_detection_layer;
|
||||
l.backward = backward_detection_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_detection_layer_gpu;
|
||||
l.backward_gpu = backward_detection_layer_gpu;
|
||||
l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
|
||||
l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
|
||||
#endif
|
||||
@ -216,6 +221,35 @@ void backward_detection_layer(const detection_layer l, network_state state)
|
||||
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
|
||||
}
|
||||
|
||||
void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
|
||||
{
|
||||
int i,j,n;
|
||||
float *predictions = l.output;
|
||||
//int per_cell = 5*num+classes;
|
||||
for (i = 0; i < l.side*l.side; ++i){
|
||||
int row = i / l.side;
|
||||
int col = i % l.side;
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = i*l.n + n;
|
||||
int p_index = l.side*l.side*l.classes + i*l.n + n;
|
||||
float scale = predictions[p_index];
|
||||
int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4;
|
||||
boxes[index].x = (predictions[box_index + 0] + col) / l.side * w;
|
||||
boxes[index].y = (predictions[box_index + 1] + row) / l.side * h;
|
||||
boxes[index].w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w;
|
||||
boxes[index].h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h;
|
||||
for(j = 0; j < l.classes; ++j){
|
||||
int class_index = i*l.classes;
|
||||
float prob = scale*predictions[class_index+j];
|
||||
probs[index][j] = (prob > thresh) ? prob : 0;
|
||||
}
|
||||
if(only_objectness){
|
||||
probs[index][0] = scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
void forward_detection_layer_gpu(const detection_layer l, network_state state)
|
||||
|
@ -9,6 +9,7 @@ typedef layer detection_layer;
|
||||
detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
|
||||
void forward_detection_layer(const detection_layer l, network_state state);
|
||||
void backward_detection_layer(const detection_layer l, network_state state);
|
||||
void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
||||
|
||||
#ifdef GPU
|
||||
void forward_detection_layer_gpu(const detection_layer l, network_state state);
|
||||
|
121
src/detector.c
121
src/detector.c
@ -1,16 +1,16 @@
|
||||
#include "network.h"
|
||||
#include "detection_layer.h"
|
||||
#include "region_layer.h"
|
||||
#include "cost_layer.h"
|
||||
#include "utils.h"
|
||||
#include "parser.h"
|
||||
#include "box.h"
|
||||
#include "demo.h"
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#endif
|
||||
|
||||
static char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
|
||||
static image voc_labels[20];
|
||||
|
||||
void train_detector(char *cfgfile, char *weightfile)
|
||||
{
|
||||
@ -49,13 +49,14 @@ void train_detector(char *cfgfile, char *weightfile)
|
||||
args.num_boxes = l.max_boxes;
|
||||
args.d = &buffer;
|
||||
args.type = DETECTION_DATA;
|
||||
args.threads = 4;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
pthread_t load_thread = load_data(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
while(get_current_batch(net) < net.max_batches){
|
||||
@ -63,7 +64,7 @@ void train_detector(char *cfgfile, char *weightfile)
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data_in_thread(args);
|
||||
load_thread = load_data(args);
|
||||
|
||||
/*
|
||||
int k;
|
||||
@ -102,44 +103,6 @@ void train_detector(char *cfgfile, char *weightfile)
|
||||
save_weights(net, buff);
|
||||
}
|
||||
|
||||
static void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
|
||||
{
|
||||
int i,j,n;
|
||||
//int per_cell = 5*num+classes;
|
||||
for (i = 0; i < side*side; ++i){
|
||||
int row = i / side;
|
||||
int col = i % side;
|
||||
for(n = 0; n < num; ++n){
|
||||
int index = i*num + n;
|
||||
int p_index = index * (classes + 5) + 4;
|
||||
float scale = predictions[p_index];
|
||||
int box_index = index * (classes + 5);
|
||||
boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w;
|
||||
boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h;
|
||||
if(0){
|
||||
boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / side * w;
|
||||
boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / side * h;
|
||||
}
|
||||
boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w;
|
||||
boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h;
|
||||
if(1){
|
||||
boxes[index].x = ((col + .5)/side + predictions[box_index + 0] * .5) * w;
|
||||
boxes[index].y = ((row + .5)/side + predictions[box_index + 1] * .5) * h;
|
||||
boxes[index].w = (exp(predictions[box_index + 2]) * .5) * w;
|
||||
boxes[index].h = (exp(predictions[box_index + 3]) * .5) * h;
|
||||
}
|
||||
for(j = 0; j < classes; ++j){
|
||||
int class_index = index * (classes + 5) + 5;
|
||||
float prob = scale*predictions[class_index+j];
|
||||
probs[index][j] = (prob > thresh) ? prob : 0;
|
||||
}
|
||||
if(only_objectness){
|
||||
probs[index][0] = scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_detector_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h)
|
||||
{
|
||||
int i, j;
|
||||
@ -179,7 +142,6 @@ void validate_detector(char *cfgfile, char *weightfile)
|
||||
|
||||
layer l = net.layers[net.n-1];
|
||||
int classes = l.classes;
|
||||
int side = l.w;
|
||||
|
||||
int j;
|
||||
FILE **fps = calloc(classes, sizeof(FILE *));
|
||||
@ -188,9 +150,9 @@ void validate_detector(char *cfgfile, char *weightfile)
|
||||
snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
|
||||
fps[j] = fopen(buff, "w");
|
||||
}
|
||||
box *boxes = calloc(side*side*l.n, sizeof(box));
|
||||
float **probs = calloc(side*side*l.n, sizeof(float *));
|
||||
for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
||||
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
|
||||
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
|
||||
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
@ -235,12 +197,12 @@ void validate_detector(char *cfgfile, char *weightfile)
|
||||
char *path = paths[i+t-nthreads];
|
||||
char *id = basecfg(path);
|
||||
float *X = val_resized[t].data;
|
||||
float *predictions = network_predict(net, X);
|
||||
network_predict(net, X);
|
||||
int w = val[t].w;
|
||||
int h = val[t].h;
|
||||
convert_detections(predictions, classes, l.n, 0, side, w, h, thresh, probs, boxes, 0);
|
||||
if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, nms);
|
||||
print_detector_detections(fps, id, boxes, probs, side*side*l.n, classes, w, h);
|
||||
get_region_boxes(l, w, h, thresh, probs, boxes, 0);
|
||||
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
|
||||
print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h);
|
||||
free(id);
|
||||
free_image(val[t]);
|
||||
free_image(val_resized[t]);
|
||||
@ -268,8 +230,6 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
||||
|
||||
layer l = net.layers[net.n-1];
|
||||
int classes = l.classes;
|
||||
int square = l.sqrt;
|
||||
int side = l.side;
|
||||
|
||||
int j, k;
|
||||
FILE **fps = calloc(classes, sizeof(FILE *));
|
||||
@ -278,9 +238,9 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
||||
snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
|
||||
fps[j] = fopen(buff, "w");
|
||||
}
|
||||
box *boxes = calloc(side*side*l.n, sizeof(box));
|
||||
float **probs = calloc(side*side*l.n, sizeof(float *));
|
||||
for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
||||
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
|
||||
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
|
||||
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
@ -299,18 +259,19 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
||||
image orig = load_image_color(path, 0, 0);
|
||||
image sized = resize_image(orig, net.w, net.h);
|
||||
char *id = basecfg(path);
|
||||
float *predictions = network_predict(net, sized.data);
|
||||
convert_detections(predictions, classes, l.n, square, l.w, 1, 1, thresh, probs, boxes, 1);
|
||||
if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms);
|
||||
network_predict(net, sized.data);
|
||||
get_region_boxes(l, 1, 1, thresh, probs, boxes, 1);
|
||||
if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);
|
||||
|
||||
char *labelpath = find_replace(path, "images", "labels");
|
||||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
char labelpath[4096];
|
||||
find_replace(path, "images", "labels", labelpath);
|
||||
find_replace(labelpath, "JPEGImages", "labels", labelpath);
|
||||
find_replace(labelpath, ".jpg", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPEG", ".txt", labelpath);
|
||||
|
||||
int num_labels = 0;
|
||||
box_label *truth = read_boxes(labelpath, &num_labels);
|
||||
for(k = 0; k < side*side*l.n; ++k){
|
||||
for(k = 0; k < l.w*l.h*l.n; ++k){
|
||||
if(probs[k][0] > thresh){
|
||||
++proposals;
|
||||
}
|
||||
@ -319,7 +280,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
||||
++total;
|
||||
box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
|
||||
float best_iou = 0;
|
||||
for(k = 0; k < side*side*l.n; ++k){
|
||||
for(k = 0; k < l.w*l.h*l.n; ++k){
|
||||
float iou = box_iou(boxes[k], t);
|
||||
if(probs[k][0] > thresh && iou > best_iou){
|
||||
best_iou = iou;
|
||||
@ -340,13 +301,12 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
||||
|
||||
void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
{
|
||||
|
||||
image *alphabet = load_alphabet();
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
detection_layer l = net.layers[net.n-1];
|
||||
l.side = l.w;
|
||||
layer l = net.layers[net.n-1];
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
clock_t time;
|
||||
@ -354,9 +314,9 @@ void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh
|
||||
char *input = buff;
|
||||
int j;
|
||||
float nms=.4;
|
||||
box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
|
||||
float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
|
||||
for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
|
||||
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
|
||||
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
|
||||
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
|
||||
while(1){
|
||||
if(filename){
|
||||
strncpy(input, filename, 256);
|
||||
@ -371,12 +331,12 @@ void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh
|
||||
image sized = resize_image(im, net.w, net.h);
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
network_predict(net, X);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
convert_detections(predictions, l.classes, l.n, 0, l.w, 1, 1, thresh, probs, boxes, 0);
|
||||
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
||||
//draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
|
||||
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
|
||||
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0);
|
||||
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
|
||||
//draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
|
||||
draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
|
||||
save_image(im, "predictions");
|
||||
show_image(im, "predictions");
|
||||
|
||||
@ -392,14 +352,10 @@ void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh
|
||||
|
||||
void run_detector(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < 20; ++i){
|
||||
char buff[256];
|
||||
sprintf(buff, "data/labels/%s.png", voc_names[i]);
|
||||
voc_labels[i] = load_image_color(buff, 0, 0);
|
||||
}
|
||||
|
||||
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
|
||||
float thresh = find_float_arg(argc, argv, "-thresh", .2);
|
||||
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||
int frame_skip = find_int_arg(argc, argv, "-s", 0);
|
||||
if(argc < 4){
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
@ -412,4 +368,5 @@ void run_detector(int argc, char **argv)
|
||||
else if(0==strcmp(argv[2], "train")) train_detector(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_detector(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix);
|
||||
}
|
||||
|
@ -15,7 +15,11 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
|
||||
l.batch = batch;
|
||||
l.rand = calloc(inputs*batch, sizeof(float));
|
||||
l.scale = 1./(1.-probability);
|
||||
l.forward = forward_dropout_layer;
|
||||
l.backward = backward_dropout_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_dropout_layer_gpu;
|
||||
l.backward_gpu = backward_dropout_layer_gpu;
|
||||
l.rand_gpu = cuda_make_array(l.rand, inputs*batch);
|
||||
#endif
|
||||
return l;
|
||||
|
@ -85,7 +85,15 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
|
||||
l.z_cpu = calloc(outputs*batch, sizeof(float));
|
||||
l.h_cpu = calloc(outputs*batch, sizeof(float));
|
||||
|
||||
l.forward = forward_gru_layer;
|
||||
l.backward = backward_gru_layer;
|
||||
l.update = update_gru_layer;
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_gru_layer_gpu;
|
||||
l.backward_gpu = backward_gru_layer_gpu;
|
||||
l.update_gpu = update_gru_layer_gpu;
|
||||
|
||||
l.forgot_state_gpu = cuda_make_array(l.output, batch*outputs);
|
||||
l.forgot_delta_gpu = cuda_make_array(l.output, batch*outputs);
|
||||
l.prev_state_gpu = cuda_make_array(l.output, batch*outputs);
|
||||
|
@ -1,24 +1,23 @@
|
||||
|
||||
#ifndef RNN_LAYER_H
|
||||
#define RNN_LAYER_H
|
||||
#ifndef GRU_LAYER_H
|
||||
#define GRU_LAYER_H
|
||||
|
||||
#include "activations.h"
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
#define USET
|
||||
|
||||
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);
|
||||
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
|
||||
|
||||
void forward_rnn_layer(layer l, network_state state);
|
||||
void backward_rnn_layer(layer l, network_state state);
|
||||
void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
void forward_gru_layer(layer l, network_state state);
|
||||
void backward_gru_layer(layer l, network_state state);
|
||||
void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
|
||||
#ifdef GPU
|
||||
void forward_rnn_layer_gpu(layer l, network_state state);
|
||||
void backward_rnn_layer_gpu(layer l, network_state state);
|
||||
void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
void push_rnn_layer(layer l);
|
||||
void pull_rnn_layer(layer l);
|
||||
void forward_gru_layer_gpu(layer l, network_state state);
|
||||
void backward_gru_layer_gpu(layer l, network_state state);
|
||||
void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
void push_gru_layer(layer l);
|
||||
void pull_gru_layer(layer l);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
190
src/image.c
190
src/image.c
@ -10,6 +10,12 @@
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#include "stb_image_write.h"
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#include "opencv2/imgproc/imgproc_c.h"
|
||||
#endif
|
||||
|
||||
|
||||
int windows = 0;
|
||||
|
||||
float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} };
|
||||
@ -25,10 +31,66 @@ float get_color(int c, int x, int max)
|
||||
return r;
|
||||
}
|
||||
|
||||
void composite_image(image source, image dest, int dx, int dy)
|
||||
{
|
||||
int x,y,k;
|
||||
for(k = 0; k < source.c; ++k){
|
||||
for(y = 0; y < source.h; ++y){
|
||||
for(x = 0; x < source.w; ++x){
|
||||
float val = get_pixel(source, x, y, k);
|
||||
float val2 = get_pixel_extend(dest, dx+x, dy+y, k);
|
||||
set_pixel(dest, dx+x, dy+y, k, val * val2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image border_image(image a, int border)
|
||||
{
|
||||
image b = make_image(a.w + 2*border, a.h + 2*border, a.c);
|
||||
int x,y,k;
|
||||
for(k = 0; k < b.c; ++k){
|
||||
for(y = 0; y < b.h; ++y){
|
||||
for(x = 0; x < b.w; ++x){
|
||||
float val = get_pixel_extend(a, x - border, y - border, k);
|
||||
set_pixel(b, x, y, k, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
image tile_images(image a, image b, int dx)
|
||||
{
|
||||
if(a.w == 0) return copy_image(b);
|
||||
image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c);
|
||||
fill_cpu(c.w*c.h*c.c, 1, c.data, 1);
|
||||
embed_image(a, c, 0, 0);
|
||||
composite_image(b, c, a.w + dx, 0);
|
||||
return c;
|
||||
}
|
||||
|
||||
image get_label(image *characters, char *string)
|
||||
{
|
||||
image label = make_empty_image(0,0,0);
|
||||
while(*string){
|
||||
image l = characters[(int)*string];
|
||||
image n = tile_images(label, l, -4);
|
||||
free_image(label);
|
||||
label = n;
|
||||
++string;
|
||||
}
|
||||
image b = border_image(label, label.h*.25);
|
||||
free_image(label);
|
||||
return b;
|
||||
}
|
||||
|
||||
void draw_label(image a, int r, int c, image label, const float *rgb)
|
||||
{
|
||||
float ratio = (float) label.w / label.h;
|
||||
int h = label.h;
|
||||
int h = a.h * .04;
|
||||
h = label.h;
|
||||
h = a.h * .06;
|
||||
int w = ratio * h;
|
||||
image rl = resize_image(label, w, h);
|
||||
if (r - h >= 0) r = r - h;
|
||||
@ -102,7 +164,19 @@ void draw_bbox(image a, box bbox, int w, float r, float g, float b)
|
||||
}
|
||||
}
|
||||
|
||||
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes)
|
||||
image *load_alphabet()
|
||||
{
|
||||
int i;
|
||||
image *alphabet = calloc(128, sizeof(image));
|
||||
for(i = 32; i < 127; ++i){
|
||||
char buff[256];
|
||||
sprintf(buff, "data/labels/%d.png", i);
|
||||
alphabet[i] = load_image_color(buff, 0, 0);
|
||||
}
|
||||
return alphabet;
|
||||
}
|
||||
|
||||
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *alphabet, int classes)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -111,7 +185,7 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs,
|
||||
float prob = probs[i][class];
|
||||
if(prob > thresh){
|
||||
//int width = pow(prob, 1./2.)*30+1;
|
||||
int width = 8;
|
||||
int width = im.h * .012;
|
||||
printf("%s: %.0f%%\n", names[class], prob*100);
|
||||
int offset = class*1 % classes;
|
||||
float red = get_color(2,offset,classes);
|
||||
@ -137,7 +211,10 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs,
|
||||
if(bot > im.h-1) bot = im.h-1;
|
||||
|
||||
draw_box_width(im, left, top, right, bot, width, red, green, blue);
|
||||
if (labels) draw_label(im, top + width, left, labels[class], rgb);
|
||||
if (alphabet) {
|
||||
image label = get_label(alphabet, names[class]);
|
||||
draw_label(im, top + width, left, label, rgb);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -368,6 +445,53 @@ void show_image(image p, const char *name)
|
||||
}
|
||||
|
||||
#ifdef OPENCV
|
||||
|
||||
image ipl_to_image(IplImage* src)
|
||||
{
|
||||
unsigned char *data = (unsigned char *)src->imageData;
|
||||
int h = src->height;
|
||||
int w = src->width;
|
||||
int c = src->nChannels;
|
||||
int step = src->widthStep;
|
||||
image out = make_image(w, h, c);
|
||||
int i, j, k, count=0;;
|
||||
|
||||
for(k= 0; k < c; ++k){
|
||||
for(i = 0; i < h; ++i){
|
||||
for(j = 0; j < w; ++j){
|
||||
out.data[count++] = data[i*step + j*c + k]/255.;
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
image load_image_cv(char *filename, int channels)
|
||||
{
|
||||
IplImage* src = 0;
|
||||
int flag = -1;
|
||||
if (channels == 0) flag = -1;
|
||||
else if (channels == 1) flag = 0;
|
||||
else if (channels == 3) flag = 1;
|
||||
else {
|
||||
fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
|
||||
}
|
||||
|
||||
if( (src = cvLoadImage(filename, flag)) == 0 )
|
||||
{
|
||||
fprintf(stderr, "Cannot load image \"%s\"\n", filename);
|
||||
char buff[256];
|
||||
sprintf(buff, "echo %s >> bad.list", filename);
|
||||
system(buff);
|
||||
return make_image(10,10,3);
|
||||
//exit(0);
|
||||
}
|
||||
image out = ipl_to_image(src);
|
||||
cvReleaseImage(&src);
|
||||
rgbgr_image(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
image get_image_from_stream(CvCapture *cap)
|
||||
{
|
||||
IplImage* src = cvQueryFrame(cap);
|
||||
@ -376,9 +500,7 @@ image get_image_from_stream(CvCapture *cap)
|
||||
rgbgr_image(im);
|
||||
return im;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV
|
||||
void save_image_jpg(image p, const char *name)
|
||||
{
|
||||
image copy = copy_image(p);
|
||||
@ -980,7 +1102,7 @@ void test_resize(char *filename)
|
||||
image aug = random_augment_image(im, 0, 320, 448, 320, .75);
|
||||
show_image(aug, "aug");
|
||||
free_image(aug);
|
||||
|
||||
|
||||
|
||||
float exposure = 1.15;
|
||||
float saturation = 1.15;
|
||||
@ -1001,55 +1123,6 @@ void test_resize(char *filename)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef OPENCV
|
||||
image ipl_to_image(IplImage* src)
|
||||
{
|
||||
unsigned char *data = (unsigned char *)src->imageData;
|
||||
int h = src->height;
|
||||
int w = src->width;
|
||||
int c = src->nChannels;
|
||||
int step = src->widthStep;
|
||||
image out = make_image(w, h, c);
|
||||
int i, j, k, count=0;;
|
||||
|
||||
for(k= 0; k < c; ++k){
|
||||
for(i = 0; i < h; ++i){
|
||||
for(j = 0; j < w; ++j){
|
||||
out.data[count++] = data[i*step + j*c + k]/255.;
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
image load_image_cv(char *filename, int channels)
|
||||
{
|
||||
IplImage* src = 0;
|
||||
int flag = -1;
|
||||
if (channels == 0) flag = -1;
|
||||
else if (channels == 1) flag = 0;
|
||||
else if (channels == 3) flag = 1;
|
||||
else {
|
||||
fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
|
||||
}
|
||||
|
||||
if( (src = cvLoadImage(filename, flag)) == 0 )
|
||||
{
|
||||
fprintf(stderr, "Cannot load image \"%s\"\n", filename);
|
||||
char buff[256];
|
||||
sprintf(buff, "echo %s >> bad.list", filename);
|
||||
system(buff);
|
||||
return make_image(10,10,3);
|
||||
//exit(0);
|
||||
}
|
||||
image out = ipl_to_image(src);
|
||||
cvReleaseImage(&src);
|
||||
rgbgr_image(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
image load_image_stb(char *filename, int channels)
|
||||
{
|
||||
@ -1122,6 +1195,7 @@ float get_pixel_extend(image m, int x, int y, int c)
|
||||
}
|
||||
void set_pixel(image m, int x, int y, int c, float val)
|
||||
{
|
||||
if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;
|
||||
assert(x < m.w && y < m.h && c < m.c);
|
||||
m.data[c*m.h*m.w + y*m.w + x] = val;
|
||||
}
|
||||
@ -1247,5 +1321,7 @@ void show_images(image *ims, int n, char *window)
|
||||
|
||||
void free_image(image m)
|
||||
{
|
||||
free(m.data);
|
||||
if(m.data){
|
||||
free(m.data);
|
||||
}
|
||||
}
|
||||
|
13
src/image.h
13
src/image.h
@ -8,11 +8,6 @@
|
||||
#include <math.h>
|
||||
#include "box.h"
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#include "opencv2/imgproc/imgproc_c.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int h;
|
||||
int w;
|
||||
@ -26,6 +21,7 @@ void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b
|
||||
void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
|
||||
void draw_bbox(image a, box bbox, int w, float r, float g, float b);
|
||||
void draw_label(image a, int r, int c, image label, const float *rgb);
|
||||
void write_label(image a, int r, int c, image *characters, char *string, float *rgb);
|
||||
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes);
|
||||
image image_distance(image a, image b);
|
||||
void scale_image(image m, float s);
|
||||
@ -64,12 +60,6 @@ void show_images(image *ims, int n, char *window);
|
||||
void show_image_layers(image p, char *name);
|
||||
void show_image_collapsed(image p, char *name);
|
||||
|
||||
#ifdef OPENCV
|
||||
void save_image_jpg(image p, const char *name);
|
||||
image get_image_from_stream(CvCapture *cap);
|
||||
image ipl_to_image(IplImage* src);
|
||||
#endif
|
||||
|
||||
void print_image(image m);
|
||||
|
||||
image make_image(int w, int h, int c);
|
||||
@ -79,6 +69,7 @@ image float_to_image(int w, int h, int c, float *data);
|
||||
image copy_image(image p);
|
||||
image load_image(char *filename, int w, int h, int c);
|
||||
image load_image_color(char *filename, int w, int h);
|
||||
image *load_alphabet();
|
||||
|
||||
float get_pixel(image m, int x, int y, int c);
|
||||
float get_pixel_extend(image m, int x, int y, int c);
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include "activations.h"
|
||||
#include "stddef.h"
|
||||
|
||||
struct network_state;
|
||||
|
||||
struct layer;
|
||||
typedef struct layer layer;
|
||||
|
||||
@ -42,6 +44,12 @@ struct layer{
|
||||
LAYER_TYPE type;
|
||||
ACTIVATION activation;
|
||||
COST_TYPE cost_type;
|
||||
void (*forward) (struct layer, struct network_state);
|
||||
void (*backward) (struct layer, struct network_state);
|
||||
void (*update) (struct layer, int, float, float, float);
|
||||
void (*forward_gpu) (struct layer, struct network_state);
|
||||
void (*backward_gpu) (struct layer, struct network_state);
|
||||
void (*update_gpu) (struct layer, int, float, float, float);
|
||||
int batch_normalize;
|
||||
int shortcut;
|
||||
int batch;
|
||||
|
@ -60,8 +60,16 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
|
||||
l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
|
||||
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
|
||||
l.forward = forward_local_layer;
|
||||
l.backward = backward_local_layer;
|
||||
l.update = update_local_layer;
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_local_layer_gpu;
|
||||
l.backward_gpu = backward_local_layer_gpu;
|
||||
l.update_gpu = update_local_layer_gpu;
|
||||
|
||||
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations);
|
||||
|
||||
|
@ -39,7 +39,11 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
||||
l.indexes = calloc(output_size, sizeof(int));
|
||||
l.output = calloc(output_size, sizeof(float));
|
||||
l.delta = calloc(output_size, sizeof(float));
|
||||
l.forward = forward_maxpool_layer;
|
||||
l.backward = backward_maxpool_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_maxpool_layer_gpu;
|
||||
l.backward_gpu = backward_maxpool_layer_gpu;
|
||||
l.indexes_gpu = cuda_make_int_array(output_size);
|
||||
l.output_gpu = cuda_make_array(l.output, output_size);
|
||||
l.delta_gpu = cuda_make_array(l.delta, output_size);
|
||||
|
113
src/network.c
113
src/network.c
@ -15,7 +15,6 @@
|
||||
#include "local_layer.h"
|
||||
#include "convolutional_layer.h"
|
||||
#include "activation_layer.h"
|
||||
#include "deconvolutional_layer.h"
|
||||
#include "detection_layer.h"
|
||||
#include "region_layer.h"
|
||||
#include "normalization_layer.h"
|
||||
@ -153,49 +152,7 @@ void forward_network(network net, network_state state)
|
||||
if(l.delta){
|
||||
scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
|
||||
}
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
forward_convolutional_layer(l, state);
|
||||
} else if(l.type == DECONVOLUTIONAL){
|
||||
forward_deconvolutional_layer(l, state);
|
||||
} else if(l.type == ACTIVE){
|
||||
forward_activation_layer(l, state);
|
||||
} else if(l.type == LOCAL){
|
||||
forward_local_layer(l, state);
|
||||
} else if(l.type == NORMALIZATION){
|
||||
forward_normalization_layer(l, state);
|
||||
} else if(l.type == BATCHNORM){
|
||||
forward_batchnorm_layer(l, state);
|
||||
} else if(l.type == DETECTION){
|
||||
forward_detection_layer(l, state);
|
||||
} else if(l.type == REGION){
|
||||
forward_region_layer(l, state);
|
||||
} else if(l.type == CONNECTED){
|
||||
forward_connected_layer(l, state);
|
||||
} else if(l.type == RNN){
|
||||
forward_rnn_layer(l, state);
|
||||
} else if(l.type == GRU){
|
||||
forward_gru_layer(l, state);
|
||||
} else if(l.type == CRNN){
|
||||
forward_crnn_layer(l, state);
|
||||
} else if(l.type == CROP){
|
||||
forward_crop_layer(l, state);
|
||||
} else if(l.type == COST){
|
||||
forward_cost_layer(l, state);
|
||||
} else if(l.type == SOFTMAX){
|
||||
forward_softmax_layer(l, state);
|
||||
} else if(l.type == MAXPOOL){
|
||||
forward_maxpool_layer(l, state);
|
||||
} else if(l.type == REORG){
|
||||
forward_reorg_layer(l, state);
|
||||
} else if(l.type == AVGPOOL){
|
||||
forward_avgpool_layer(l, state);
|
||||
} else if(l.type == DROPOUT){
|
||||
forward_dropout_layer(l, state);
|
||||
} else if(l.type == ROUTE){
|
||||
forward_route_layer(l, net);
|
||||
} else if(l.type == SHORTCUT){
|
||||
forward_shortcut_layer(l, state);
|
||||
}
|
||||
l.forward(l, state);
|
||||
state.input = l.output;
|
||||
}
|
||||
}
|
||||
@ -207,29 +164,17 @@ void update_network(network net)
|
||||
float rate = get_current_rate(net);
|
||||
for(i = 0; i < net.n; ++i){
|
||||
layer l = net.layers[i];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
update_convolutional_layer(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == DECONVOLUTIONAL){
|
||||
update_deconvolutional_layer(l, rate, net.momentum, net.decay);
|
||||
} else if(l.type == CONNECTED){
|
||||
update_connected_layer(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == RNN){
|
||||
update_rnn_layer(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == GRU){
|
||||
update_gru_layer(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == CRNN){
|
||||
update_crnn_layer(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == LOCAL){
|
||||
update_local_layer(l, update_batch, rate, net.momentum, net.decay);
|
||||
if(l.update){
|
||||
l.update(l, update_batch, rate, net.momentum, net.decay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float *get_network_output(network net)
|
||||
{
|
||||
#ifdef GPU
|
||||
if (gpu_index >= 0) return get_network_output_gpu(net);
|
||||
#endif
|
||||
#ifdef GPU
|
||||
if (gpu_index >= 0) return get_network_output_gpu(net);
|
||||
#endif
|
||||
int i;
|
||||
for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break;
|
||||
return net.layers[i].output;
|
||||
@ -273,47 +218,7 @@ void backward_network(network net, network_state state)
|
||||
state.delta = prev.delta;
|
||||
}
|
||||
layer l = net.layers[i];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
backward_convolutional_layer(l, state);
|
||||
} else if(l.type == DECONVOLUTIONAL){
|
||||
backward_deconvolutional_layer(l, state);
|
||||
} else if(l.type == ACTIVE){
|
||||
backward_activation_layer(l, state);
|
||||
} else if(l.type == NORMALIZATION){
|
||||
backward_normalization_layer(l, state);
|
||||
} else if(l.type == BATCHNORM){
|
||||
backward_batchnorm_layer(l, state);
|
||||
} else if(l.type == MAXPOOL){
|
||||
if(i != 0) backward_maxpool_layer(l, state);
|
||||
} else if(l.type == REORG){
|
||||
backward_reorg_layer(l, state);
|
||||
} else if(l.type == AVGPOOL){
|
||||
backward_avgpool_layer(l, state);
|
||||
} else if(l.type == DROPOUT){
|
||||
backward_dropout_layer(l, state);
|
||||
} else if(l.type == DETECTION){
|
||||
backward_detection_layer(l, state);
|
||||
} else if(l.type == REGION){
|
||||
backward_region_layer(l, state);
|
||||
} else if(l.type == SOFTMAX){
|
||||
if(i != 0) backward_softmax_layer(l, state);
|
||||
} else if(l.type == CONNECTED){
|
||||
backward_connected_layer(l, state);
|
||||
} else if(l.type == RNN){
|
||||
backward_rnn_layer(l, state);
|
||||
} else if(l.type == GRU){
|
||||
backward_gru_layer(l, state);
|
||||
} else if(l.type == CRNN){
|
||||
backward_crnn_layer(l, state);
|
||||
} else if(l.type == LOCAL){
|
||||
backward_local_layer(l, state);
|
||||
} else if(l.type == COST){
|
||||
backward_cost_layer(l, state);
|
||||
} else if(l.type == ROUTE){
|
||||
backward_route_layer(l, net);
|
||||
} else if(l.type == SHORTCUT){
|
||||
backward_shortcut_layer(l, state);
|
||||
}
|
||||
l.backward(l, state);
|
||||
}
|
||||
}
|
||||
|
||||
@ -406,11 +311,11 @@ void set_batch_network(network *net, int b)
|
||||
int i;
|
||||
for(i = 0; i < net->n; ++i){
|
||||
net->layers[i].batch = b;
|
||||
#ifdef CUDNN
|
||||
#ifdef CUDNN
|
||||
if(net->layers[i].type == CONVOLUTIONAL){
|
||||
cudnn_convolutional_setup(net->layers + i);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,6 @@ extern "C" {
|
||||
#include "region_layer.h"
|
||||
#include "convolutional_layer.h"
|
||||
#include "activation_layer.h"
|
||||
#include "deconvolutional_layer.h"
|
||||
#include "maxpool_layer.h"
|
||||
#include "reorg_layer.h"
|
||||
#include "avgpool_layer.h"
|
||||
@ -51,49 +50,7 @@ void forward_network_gpu(network net, network_state state)
|
||||
if(l.delta_gpu){
|
||||
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
|
||||
}
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
forward_convolutional_layer_gpu(l, state);
|
||||
} else if(l.type == DECONVOLUTIONAL){
|
||||
forward_deconvolutional_layer_gpu(l, state);
|
||||
} else if(l.type == ACTIVE){
|
||||
forward_activation_layer_gpu(l, state);
|
||||
} else if(l.type == LOCAL){
|
||||
forward_local_layer_gpu(l, state);
|
||||
} else if(l.type == DETECTION){
|
||||
forward_detection_layer_gpu(l, state);
|
||||
} else if(l.type == REGION){
|
||||
forward_region_layer_gpu(l, state);
|
||||
} else if(l.type == CONNECTED){
|
||||
forward_connected_layer_gpu(l, state);
|
||||
} else if(l.type == RNN){
|
||||
forward_rnn_layer_gpu(l, state);
|
||||
} else if(l.type == GRU){
|
||||
forward_gru_layer_gpu(l, state);
|
||||
} else if(l.type == CRNN){
|
||||
forward_crnn_layer_gpu(l, state);
|
||||
} else if(l.type == CROP){
|
||||
forward_crop_layer_gpu(l, state);
|
||||
} else if(l.type == COST){
|
||||
forward_cost_layer_gpu(l, state);
|
||||
} else if(l.type == SOFTMAX){
|
||||
forward_softmax_layer_gpu(l, state);
|
||||
} else if(l.type == NORMALIZATION){
|
||||
forward_normalization_layer_gpu(l, state);
|
||||
} else if(l.type == BATCHNORM){
|
||||
forward_batchnorm_layer_gpu(l, state);
|
||||
} else if(l.type == MAXPOOL){
|
||||
forward_maxpool_layer_gpu(l, state);
|
||||
} else if(l.type == REORG){
|
||||
forward_reorg_layer_gpu(l, state);
|
||||
} else if(l.type == AVGPOOL){
|
||||
forward_avgpool_layer_gpu(l, state);
|
||||
} else if(l.type == DROPOUT){
|
||||
forward_dropout_layer_gpu(l, state);
|
||||
} else if(l.type == ROUTE){
|
||||
forward_route_layer_gpu(l, net);
|
||||
} else if(l.type == SHORTCUT){
|
||||
forward_shortcut_layer_gpu(l, state);
|
||||
}
|
||||
l.forward_gpu(l, state);
|
||||
state.input = l.output_gpu;
|
||||
}
|
||||
}
|
||||
@ -115,47 +72,7 @@ void backward_network_gpu(network net, network_state state)
|
||||
state.input = prev.output_gpu;
|
||||
state.delta = prev.delta_gpu;
|
||||
}
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
backward_convolutional_layer_gpu(l, state);
|
||||
} else if(l.type == DECONVOLUTIONAL){
|
||||
backward_deconvolutional_layer_gpu(l, state);
|
||||
} else if(l.type == ACTIVE){
|
||||
backward_activation_layer_gpu(l, state);
|
||||
} else if(l.type == LOCAL){
|
||||
backward_local_layer_gpu(l, state);
|
||||
} else if(l.type == MAXPOOL){
|
||||
if(i != 0) backward_maxpool_layer_gpu(l, state);
|
||||
} else if(l.type == REORG){
|
||||
backward_reorg_layer_gpu(l, state);
|
||||
} else if(l.type == AVGPOOL){
|
||||
if(i != 0) backward_avgpool_layer_gpu(l, state);
|
||||
} else if(l.type == DROPOUT){
|
||||
backward_dropout_layer_gpu(l, state);
|
||||
} else if(l.type == DETECTION){
|
||||
backward_detection_layer_gpu(l, state);
|
||||
} else if(l.type == REGION){
|
||||
backward_region_layer_gpu(l, state);
|
||||
} else if(l.type == NORMALIZATION){
|
||||
backward_normalization_layer_gpu(l, state);
|
||||
} else if(l.type == BATCHNORM){
|
||||
backward_batchnorm_layer_gpu(l, state);
|
||||
} else if(l.type == SOFTMAX){
|
||||
if(i != 0) backward_softmax_layer_gpu(l, state);
|
||||
} else if(l.type == CONNECTED){
|
||||
backward_connected_layer_gpu(l, state);
|
||||
} else if(l.type == RNN){
|
||||
backward_rnn_layer_gpu(l, state);
|
||||
} else if(l.type == GRU){
|
||||
backward_gru_layer_gpu(l, state);
|
||||
} else if(l.type == CRNN){
|
||||
backward_crnn_layer_gpu(l, state);
|
||||
} else if(l.type == COST){
|
||||
backward_cost_layer_gpu(l, state);
|
||||
} else if(l.type == ROUTE){
|
||||
backward_route_layer_gpu(l, net);
|
||||
} else if(l.type == SHORTCUT){
|
||||
backward_shortcut_layer_gpu(l, state);
|
||||
}
|
||||
l.backward_gpu(l, state);
|
||||
}
|
||||
}
|
||||
|
||||
@ -166,20 +83,8 @@ void update_network_gpu(network net)
|
||||
float rate = get_current_rate(net);
|
||||
for(i = 0; i < net.n; ++i){
|
||||
layer l = net.layers[i];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == DECONVOLUTIONAL){
|
||||
update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay);
|
||||
} else if(l.type == CONNECTED){
|
||||
update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == GRU){
|
||||
update_gru_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == RNN){
|
||||
update_rnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == CRNN){
|
||||
update_crnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == LOCAL){
|
||||
update_local_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
if(l.update_gpu){
|
||||
l.update_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -271,20 +176,8 @@ void update_layer(layer l, network net)
|
||||
{
|
||||
int update_batch = net.batch*net.subdivisions;
|
||||
float rate = get_current_rate(net);
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == DECONVOLUTIONAL){
|
||||
update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay);
|
||||
} else if(l.type == CONNECTED){
|
||||
update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == RNN){
|
||||
update_rnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == GRU){
|
||||
update_gru_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == CRNN){
|
||||
update_crnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
} else if(l.type == LOCAL){
|
||||
update_local_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
if(l.update_gpu){
|
||||
l.update_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
}
|
||||
}
|
||||
|
||||
@ -463,7 +356,7 @@ float train_networks(network *nets, int n, data d, int interval)
|
||||
}
|
||||
for(i = 0; i < n; ++i){
|
||||
pthread_join(threads[i], 0);
|
||||
printf("%f\n", errors[i]);
|
||||
//printf("%f\n", errors[i]);
|
||||
sum += errors[i];
|
||||
}
|
||||
if (get_current_batch(nets[0]) % interval == 0) {
|
||||
@ -492,6 +385,7 @@ float *get_network_output_gpu(network net)
|
||||
|
||||
float *network_predict_gpu(network net, float *input)
|
||||
{
|
||||
cuda_set_device(net.gpu_index);
|
||||
int size = get_network_input_size(net) * net.batch;
|
||||
network_state state;
|
||||
state.index = 0;
|
||||
|
@ -21,7 +21,13 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a
|
||||
layer.norms = calloc(h * w * c * batch, sizeof(float));
|
||||
layer.inputs = w*h*c;
|
||||
layer.outputs = layer.inputs;
|
||||
|
||||
layer.forward = forward_normalization_layer;
|
||||
layer.backward = backward_normalization_layer;
|
||||
#ifdef GPU
|
||||
layer.forward_gpu = forward_normalization_layer_gpu;
|
||||
layer.backward_gpu = backward_normalization_layer_gpu;
|
||||
|
||||
layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
|
||||
layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);
|
||||
layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
|
||||
|
311
src/parser.c
311
src/parser.c
@ -12,7 +12,6 @@
|
||||
#include "activation_layer.h"
|
||||
#include "normalization_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "deconvolutional_layer.h"
|
||||
#include "connected_layer.h"
|
||||
#include "rnn_layer.h"
|
||||
#include "gru_layer.h"
|
||||
@ -36,30 +35,42 @@ typedef struct{
|
||||
list *options;
|
||||
}section;
|
||||
|
||||
int is_network(section *s);
|
||||
int is_convolutional(section *s);
|
||||
int is_activation(section *s);
|
||||
int is_local(section *s);
|
||||
int is_deconvolutional(section *s);
|
||||
int is_connected(section *s);
|
||||
int is_rnn(section *s);
|
||||
int is_gru(section *s);
|
||||
int is_crnn(section *s);
|
||||
int is_maxpool(section *s);
|
||||
int is_reorg(section *s);
|
||||
int is_avgpool(section *s);
|
||||
int is_dropout(section *s);
|
||||
int is_softmax(section *s);
|
||||
int is_normalization(section *s);
|
||||
int is_batchnorm(section *s);
|
||||
int is_crop(section *s);
|
||||
int is_shortcut(section *s);
|
||||
int is_cost(section *s);
|
||||
int is_detection(section *s);
|
||||
int is_region(section *s);
|
||||
int is_route(section *s);
|
||||
list *read_cfg(char *filename);
|
||||
|
||||
LAYER_TYPE string_to_layer_type(char * type)
|
||||
{
|
||||
|
||||
if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
|
||||
if (strcmp(type, "[crop]")==0) return CROP;
|
||||
if (strcmp(type, "[cost]")==0) return COST;
|
||||
if (strcmp(type, "[detection]")==0) return DETECTION;
|
||||
if (strcmp(type, "[region]")==0) return REGION;
|
||||
if (strcmp(type, "[local]")==0) return LOCAL;
|
||||
if (strcmp(type, "[conv]")==0
|
||||
|| strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
|
||||
if (strcmp(type, "[activation]")==0) return ACTIVE;
|
||||
if (strcmp(type, "[net]")==0
|
||||
|| strcmp(type, "[network]")==0) return NETWORK;
|
||||
if (strcmp(type, "[crnn]")==0) return CRNN;
|
||||
if (strcmp(type, "[gru]")==0) return GRU;
|
||||
if (strcmp(type, "[rnn]")==0) return RNN;
|
||||
if (strcmp(type, "[conn]")==0
|
||||
|| strcmp(type, "[connected]")==0) return CONNECTED;
|
||||
if (strcmp(type, "[max]")==0
|
||||
|| strcmp(type, "[maxpool]")==0) return MAXPOOL;
|
||||
if (strcmp(type, "[reorg]")==0) return REORG;
|
||||
if (strcmp(type, "[avg]")==0
|
||||
|| strcmp(type, "[avgpool]")==0) return AVGPOOL;
|
||||
if (strcmp(type, "[dropout]")==0) return DROPOUT;
|
||||
if (strcmp(type, "[lrn]")==0
|
||||
|| strcmp(type, "[normalization]")==0) return NORMALIZATION;
|
||||
if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
|
||||
if (strcmp(type, "[soft]")==0
|
||||
|| strcmp(type, "[softmax]")==0) return SOFTMAX;
|
||||
if (strcmp(type, "[route]")==0) return ROUTE;
|
||||
return BLANK;
|
||||
}
|
||||
|
||||
void free_section(section *s)
|
||||
{
|
||||
free(s->type);
|
||||
@ -102,26 +113,6 @@ typedef struct size_params{
|
||||
int time_steps;
|
||||
} size_params;
|
||||
|
||||
deconvolutional_layer parse_deconvolutional(list *options, size_params params)
|
||||
{
|
||||
int n = option_find_int(options, "filters",1);
|
||||
int size = option_find_int(options, "size",1);
|
||||
int stride = option_find_int(options, "stride",1);
|
||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||
ACTIVATION activation = get_activation(activation_s);
|
||||
|
||||
int batch,h,w,c;
|
||||
h = params.h;
|
||||
w = params.w;
|
||||
c = params.c;
|
||||
batch=params.batch;
|
||||
if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
|
||||
|
||||
deconvolutional_layer layer = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation);
|
||||
|
||||
return layer;
|
||||
}
|
||||
|
||||
local_layer parse_local(list *options, size_params params)
|
||||
{
|
||||
int n = option_find_int(options, "filters",1);
|
||||
@ -545,6 +536,12 @@ void parse_net_options(list *options, network *net)
|
||||
net->max_batches = option_find_int(options, "max_batches", 0);
|
||||
}
|
||||
|
||||
int is_network(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[net]")==0
|
||||
|| strcmp(s->type, "[network]")==0);
|
||||
}
|
||||
|
||||
network parse_network_cfg(char *filename)
|
||||
{
|
||||
list *sections = read_cfg(filename);
|
||||
@ -576,47 +573,46 @@ network parse_network_cfg(char *filename)
|
||||
s = (section *)n->val;
|
||||
options = s->options;
|
||||
layer l = {0};
|
||||
if(is_convolutional(s)){
|
||||
LAYER_TYPE lt = string_to_layer_type(s->type);
|
||||
if(lt == CONVOLUTIONAL){
|
||||
l = parse_convolutional(options, params);
|
||||
}else if(is_local(s)){
|
||||
}else if(lt == LOCAL){
|
||||
l = parse_local(options, params);
|
||||
}else if(is_activation(s)){
|
||||
}else if(lt == ACTIVE){
|
||||
l = parse_activation(options, params);
|
||||
}else if(is_deconvolutional(s)){
|
||||
l = parse_deconvolutional(options, params);
|
||||
}else if(is_rnn(s)){
|
||||
}else if(lt == RNN){
|
||||
l = parse_rnn(options, params);
|
||||
}else if(is_gru(s)){
|
||||
}else if(lt == GRU){
|
||||
l = parse_gru(options, params);
|
||||
}else if(is_crnn(s)){
|
||||
}else if(lt == CRNN){
|
||||
l = parse_crnn(options, params);
|
||||
}else if(is_connected(s)){
|
||||
}else if(lt == CONNECTED){
|
||||
l = parse_connected(options, params);
|
||||
}else if(is_crop(s)){
|
||||
}else if(lt == CROP){
|
||||
l = parse_crop(options, params);
|
||||
}else if(is_cost(s)){
|
||||
}else if(lt == COST){
|
||||
l = parse_cost(options, params);
|
||||
}else if(is_region(s)){
|
||||
}else if(lt == REGION){
|
||||
l = parse_region(options, params);
|
||||
}else if(is_detection(s)){
|
||||
}else if(lt == DETECTION){
|
||||
l = parse_detection(options, params);
|
||||
}else if(is_softmax(s)){
|
||||
}else if(lt == SOFTMAX){
|
||||
l = parse_softmax(options, params);
|
||||
}else if(is_normalization(s)){
|
||||
}else if(lt == NORMALIZATION){
|
||||
l = parse_normalization(options, params);
|
||||
}else if(is_batchnorm(s)){
|
||||
}else if(lt == BATCHNORM){
|
||||
l = parse_batchnorm(options, params);
|
||||
}else if(is_maxpool(s)){
|
||||
}else if(lt == MAXPOOL){
|
||||
l = parse_maxpool(options, params);
|
||||
}else if(is_reorg(s)){
|
||||
}else if(lt == REORG){
|
||||
l = parse_reorg(options, params);
|
||||
}else if(is_avgpool(s)){
|
||||
}else if(lt == AVGPOOL){
|
||||
l = parse_avgpool(options, params);
|
||||
}else if(is_route(s)){
|
||||
}else if(lt == ROUTE){
|
||||
l = parse_route(options, params, net);
|
||||
}else if(is_shortcut(s)){
|
||||
}else if(lt == SHORTCUT){
|
||||
l = parse_shortcut(options, params, net);
|
||||
}else if(is_dropout(s)){
|
||||
}else if(lt == DROPOUT){
|
||||
l = parse_dropout(options, params);
|
||||
l.output = net.layers[count-1].output;
|
||||
l.delta = net.layers[count-1].delta;
|
||||
@ -660,142 +656,6 @@ network parse_network_cfg(char *filename)
|
||||
return net;
|
||||
}
|
||||
|
||||
LAYER_TYPE string_to_layer_type(char * type)
|
||||
{
|
||||
|
||||
if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
|
||||
if (strcmp(type, "[crop]")==0) return CROP;
|
||||
if (strcmp(type, "[cost]")==0) return COST;
|
||||
if (strcmp(type, "[detection]")==0) return DETECTION;
|
||||
if (strcmp(type, "[region]")==0) return REGION;
|
||||
if (strcmp(type, "[local]")==0) return LOCAL;
|
||||
if (strcmp(type, "[deconv]")==0
|
||||
|| strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL;
|
||||
if (strcmp(type, "[conv]")==0
|
||||
|| strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
|
||||
if (strcmp(type, "[activation]")==0) return ACTIVE;
|
||||
if (strcmp(type, "[net]")==0
|
||||
|| strcmp(type, "[network]")==0) return NETWORK;
|
||||
if (strcmp(type, "[crnn]")==0) return CRNN;
|
||||
if (strcmp(type, "[gru]")==0) return GRU;
|
||||
if (strcmp(type, "[rnn]")==0) return RNN;
|
||||
if (strcmp(type, "[conn]")==0
|
||||
|| strcmp(type, "[connected]")==0) return CONNECTED;
|
||||
if (strcmp(type, "[max]")==0
|
||||
|| strcmp(type, "[maxpool]")==0) return MAXPOOL;
|
||||
if (strcmp(type, "[reorg]")==0) return REORG;
|
||||
if (strcmp(type, "[avg]")==0
|
||||
|| strcmp(type, "[avgpool]")==0) return AVGPOOL;
|
||||
if (strcmp(type, "[dropout]")==0) return DROPOUT;
|
||||
if (strcmp(type, "[lrn]")==0
|
||||
|| strcmp(type, "[normalization]")==0) return NORMALIZATION;
|
||||
if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
|
||||
if (strcmp(type, "[soft]")==0
|
||||
|| strcmp(type, "[softmax]")==0) return SOFTMAX;
|
||||
if (strcmp(type, "[route]")==0) return ROUTE;
|
||||
return BLANK;
|
||||
}
|
||||
|
||||
int is_shortcut(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[shortcut]")==0);
|
||||
}
|
||||
int is_crop(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[crop]")==0);
|
||||
}
|
||||
int is_cost(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[cost]")==0);
|
||||
}
|
||||
int is_region(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[region]")==0);
|
||||
}
|
||||
int is_detection(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[detection]")==0);
|
||||
}
|
||||
int is_local(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[local]")==0);
|
||||
}
|
||||
int is_deconvolutional(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[deconv]")==0
|
||||
|| strcmp(s->type, "[deconvolutional]")==0);
|
||||
}
|
||||
int is_convolutional(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[conv]")==0
|
||||
|| strcmp(s->type, "[convolutional]")==0);
|
||||
}
|
||||
int is_activation(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[activation]")==0);
|
||||
}
|
||||
int is_network(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[net]")==0
|
||||
|| strcmp(s->type, "[network]")==0);
|
||||
}
|
||||
int is_crnn(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[crnn]")==0);
|
||||
}
|
||||
int is_gru(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[gru]")==0);
|
||||
}
|
||||
int is_rnn(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[rnn]")==0);
|
||||
}
|
||||
int is_connected(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[conn]")==0
|
||||
|| strcmp(s->type, "[connected]")==0);
|
||||
}
|
||||
int is_reorg(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[reorg]")==0);
|
||||
}
|
||||
int is_maxpool(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[max]")==0
|
||||
|| strcmp(s->type, "[maxpool]")==0);
|
||||
}
|
||||
int is_avgpool(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[avg]")==0
|
||||
|| strcmp(s->type, "[avgpool]")==0);
|
||||
}
|
||||
int is_dropout(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[dropout]")==0);
|
||||
}
|
||||
|
||||
int is_normalization(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[lrn]")==0
|
||||
|| strcmp(s->type, "[normalization]")==0);
|
||||
}
|
||||
|
||||
int is_batchnorm(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[batchnorm]")==0);
|
||||
}
|
||||
|
||||
int is_softmax(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[soft]")==0
|
||||
|| strcmp(s->type, "[softmax]")==0);
|
||||
}
|
||||
int is_route(section *s)
|
||||
{
|
||||
return (strcmp(s->type, "[route]")==0);
|
||||
}
|
||||
|
||||
list *read_cfg(char *filename)
|
||||
{
|
||||
FILE *file = fopen(filename, "r");
|
||||
@ -831,45 +691,6 @@ list *read_cfg(char *filename)
|
||||
return sections;
|
||||
}
|
||||
|
||||
void save_weights_double(network net, char *filename)
|
||||
{
|
||||
fprintf(stderr, "Saving doubled weights to %s\n", filename);
|
||||
FILE *fp = fopen(filename, "w");
|
||||
if(!fp) file_error(filename);
|
||||
|
||||
fwrite(&net.learning_rate, sizeof(float), 1, fp);
|
||||
fwrite(&net.momentum, sizeof(float), 1, fp);
|
||||
fwrite(&net.decay, sizeof(float), 1, fp);
|
||||
fwrite(net.seen, sizeof(int), 1, fp);
|
||||
|
||||
int i,j,k;
|
||||
for(i = 0; i < net.n; ++i){
|
||||
layer l = net.layers[i];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
#ifdef GPU
|
||||
if(gpu_index >= 0){
|
||||
pull_convolutional_layer(l);
|
||||
}
|
||||
#endif
|
||||
float zero = 0;
|
||||
fwrite(l.biases, sizeof(float), l.n, fp);
|
||||
fwrite(l.biases, sizeof(float), l.n, fp);
|
||||
|
||||
for (j = 0; j < l.n; ++j){
|
||||
int index = j*l.c*l.size*l.size;
|
||||
fwrite(l.weights+index, sizeof(float), l.c*l.size*l.size, fp);
|
||||
for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp);
|
||||
}
|
||||
for (j = 0; j < l.n; ++j){
|
||||
int index = j*l.c*l.size*l.size;
|
||||
for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp);
|
||||
fwrite(l.weights+index, sizeof(float), l.c*l.size*l.size, fp);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
void save_convolutional_weights_binary(layer l, FILE *fp)
|
||||
{
|
||||
#ifdef GPU
|
||||
@ -1147,16 +968,6 @@ void load_weights_upto(network *net, char *filename, int cutoff)
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
load_convolutional_weights(l, fp);
|
||||
}
|
||||
if(l.type == DECONVOLUTIONAL){
|
||||
int num = l.n*l.c*l.size*l.size;
|
||||
fread(l.biases, sizeof(float), l.n, fp);
|
||||
fread(l.weights, sizeof(float), num, fp);
|
||||
#ifdef GPU
|
||||
if(gpu_index >= 0){
|
||||
push_deconvolutional_layer(l);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if(l.type == CONNECTED){
|
||||
load_connected_weights(l, fp, transpose);
|
||||
}
|
||||
|
@ -34,7 +34,11 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
|
||||
l.biases[i] = .5;
|
||||
}
|
||||
|
||||
l.forward = forward_region_layer;
|
||||
l.backward = backward_region_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_region_layer_gpu;
|
||||
l.backward_gpu = backward_region_layer_gpu;
|
||||
l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
|
||||
l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
|
||||
#endif
|
||||
@ -228,6 +232,45 @@ void backward_region_layer(const region_layer l, network_state state)
|
||||
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
|
||||
}
|
||||
|
||||
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
|
||||
{
|
||||
int i,j,n;
|
||||
float *predictions = l.output;
|
||||
//int per_cell = 5*num+classes;
|
||||
for (i = 0; i < l.w*l.h; ++i){
|
||||
int row = i / l.w;
|
||||
int col = i % l.w;
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = i*l.n + n;
|
||||
int p_index = index * (l.classes + 5) + 4;
|
||||
float scale = predictions[p_index];
|
||||
int box_index = index * (l.classes + 5);
|
||||
boxes[index].x = (predictions[box_index + 0] + col + .5) / l.w * w;
|
||||
boxes[index].y = (predictions[box_index + 1] + row + .5) / l.h * h;
|
||||
if(0){
|
||||
boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / l.w * w;
|
||||
boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / l.h * h;
|
||||
}
|
||||
boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (l.sqrt?2:1)) * w;
|
||||
boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (l.sqrt?2:1)) * h;
|
||||
if(1){
|
||||
boxes[index].x = ((col + .5)/l.w + predictions[box_index + 0] * .5) * w;
|
||||
boxes[index].y = ((row + .5)/l.h + predictions[box_index + 1] * .5) * h;
|
||||
boxes[index].w = (exp(predictions[box_index + 2]) * .5) * w;
|
||||
boxes[index].h = (exp(predictions[box_index + 3]) * .5) * h;
|
||||
}
|
||||
for(j = 0; j < l.classes; ++j){
|
||||
int class_index = index * (l.classes + 5) + 5;
|
||||
float prob = scale*predictions[class_index+j];
|
||||
probs[index][j] = (prob > thresh) ? prob : 0;
|
||||
}
|
||||
if(only_objectness){
|
||||
probs[index][0] = scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
void forward_region_layer_gpu(const region_layer l, network_state state)
|
||||
|
@ -9,6 +9,7 @@ typedef layer region_layer;
|
||||
region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
|
||||
void forward_region_layer(const region_layer l, network_state state);
|
||||
void backward_region_layer(const region_layer l, network_state state);
|
||||
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
||||
|
||||
#ifdef GPU
|
||||
void forward_region_layer_gpu(const region_layer l, network_state state);
|
||||
|
@ -22,7 +22,13 @@ layer make_reorg_layer(int batch, int h, int w, int c, int stride)
|
||||
int output_size = l.out_h * l.out_w * l.out_c * batch;
|
||||
l.output = calloc(output_size, sizeof(float));
|
||||
l.delta = calloc(output_size, sizeof(float));
|
||||
|
||||
l.forward = forward_reorg_layer;
|
||||
l.backward = backward_reorg_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_reorg_layer_gpu;
|
||||
l.backward_gpu = backward_reorg_layer_gpu;
|
||||
|
||||
l.output_gpu = cuda_make_array(l.output, output_size);
|
||||
l.delta_gpu = cuda_make_array(l.delta, output_size);
|
||||
#endif
|
||||
|
@ -58,7 +58,13 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
|
||||
l.output = l.output_layer->output;
|
||||
l.delta = l.output_layer->delta;
|
||||
|
||||
l.forward = forward_rnn_layer;
|
||||
l.backward = backward_rnn_layer;
|
||||
l.update = update_rnn_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_rnn_layer_gpu;
|
||||
l.backward_gpu = backward_rnn_layer_gpu;
|
||||
l.update_gpu = update_rnn_layer_gpu;
|
||||
l.state_gpu = cuda_make_array(l.state, batch*hidden*(steps+1));
|
||||
l.output_gpu = l.output_layer->output_gpu;
|
||||
l.delta_gpu = l.output_layer->delta_gpu;
|
||||
|
@ -1,23 +1,24 @@
|
||||
|
||||
#ifndef GRU_LAYER_H
|
||||
#define GRU_LAYER_H
|
||||
#ifndef RNN_LAYER_H
|
||||
#define RNN_LAYER_H
|
||||
|
||||
#include "activations.h"
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
#define USET
|
||||
|
||||
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
|
||||
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);
|
||||
|
||||
void forward_gru_layer(layer l, network_state state);
|
||||
void backward_gru_layer(layer l, network_state state);
|
||||
void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
void forward_rnn_layer(layer l, network_state state);
|
||||
void backward_rnn_layer(layer l, network_state state);
|
||||
void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
|
||||
#ifdef GPU
|
||||
void forward_gru_layer_gpu(layer l, network_state state);
|
||||
void backward_gru_layer_gpu(layer l, network_state state);
|
||||
void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
void push_gru_layer(layer l);
|
||||
void pull_gru_layer(layer l);
|
||||
void forward_rnn_layer_gpu(layer l, network_state state);
|
||||
void backward_rnn_layer_gpu(layer l, network_state state);
|
||||
void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
void push_rnn_layer(layer l);
|
||||
void pull_rnn_layer(layer l);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -6,6 +6,8 @@
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
image get_image_from_stream(CvCapture *cap);
|
||||
image ipl_to_image(IplImage* src);
|
||||
|
||||
void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters);
|
||||
|
||||
|
@ -23,20 +23,26 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz
|
||||
l.inputs = outputs;
|
||||
l.delta = calloc(outputs*batch, sizeof(float));
|
||||
l.output = calloc(outputs*batch, sizeof(float));;
|
||||
|
||||
l.forward = forward_route_layer;
|
||||
l.backward = backward_route_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_route_layer_gpu;
|
||||
l.backward_gpu = backward_route_layer_gpu;
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.delta, outputs*batch);
|
||||
l.output_gpu = cuda_make_array(l.output, outputs*batch);
|
||||
#endif
|
||||
return l;
|
||||
}
|
||||
|
||||
void forward_route_layer(const route_layer l, network net)
|
||||
void forward_route_layer(const route_layer l, network_state state)
|
||||
{
|
||||
int i, j;
|
||||
int offset = 0;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
int index = l.input_layers[i];
|
||||
float *input = net.layers[index].output;
|
||||
float *input = state.net.layers[index].output;
|
||||
int input_size = l.input_sizes[i];
|
||||
for(j = 0; j < l.batch; ++j){
|
||||
copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1);
|
||||
@ -45,13 +51,13 @@ void forward_route_layer(const route_layer l, network net)
|
||||
}
|
||||
}
|
||||
|
||||
void backward_route_layer(const route_layer l, network net)
|
||||
void backward_route_layer(const route_layer l, network_state state)
|
||||
{
|
||||
int i, j;
|
||||
int offset = 0;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
int index = l.input_layers[i];
|
||||
float *delta = net.layers[index].delta;
|
||||
float *delta = state.net.layers[index].delta;
|
||||
int input_size = l.input_sizes[i];
|
||||
for(j = 0; j < l.batch; ++j){
|
||||
axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1);
|
||||
@ -61,13 +67,13 @@ void backward_route_layer(const route_layer l, network net)
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
void forward_route_layer_gpu(const route_layer l, network net)
|
||||
void forward_route_layer_gpu(const route_layer l, network_state state)
|
||||
{
|
||||
int i, j;
|
||||
int offset = 0;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
int index = l.input_layers[i];
|
||||
float *input = net.layers[index].output_gpu;
|
||||
float *input = state.net.layers[index].output_gpu;
|
||||
int input_size = l.input_sizes[i];
|
||||
for(j = 0; j < l.batch; ++j){
|
||||
copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1);
|
||||
@ -76,13 +82,13 @@ void forward_route_layer_gpu(const route_layer l, network net)
|
||||
}
|
||||
}
|
||||
|
||||
void backward_route_layer_gpu(const route_layer l, network net)
|
||||
void backward_route_layer_gpu(const route_layer l, network_state state)
|
||||
{
|
||||
int i, j;
|
||||
int offset = 0;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
int index = l.input_layers[i];
|
||||
float *delta = net.layers[index].delta_gpu;
|
||||
float *delta = state.net.layers[index].delta_gpu;
|
||||
int input_size = l.input_sizes[i];
|
||||
for(j = 0; j < l.batch; ++j){
|
||||
axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1);
|
||||
|
@ -6,12 +6,12 @@
|
||||
typedef layer route_layer;
|
||||
|
||||
route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size);
|
||||
void forward_route_layer(const route_layer l, network net);
|
||||
void backward_route_layer(const route_layer l, network net);
|
||||
void forward_route_layer(const route_layer l, network_state state);
|
||||
void backward_route_layer(const route_layer l, network_state state);
|
||||
|
||||
#ifdef GPU
|
||||
void forward_route_layer_gpu(const route_layer l, network net);
|
||||
void backward_route_layer_gpu(const route_layer l, network net);
|
||||
void forward_route_layer_gpu(const route_layer l, network_state state);
|
||||
void backward_route_layer_gpu(const route_layer l, network_state state);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -23,7 +23,13 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
|
||||
|
||||
l.delta = calloc(l.outputs*batch, sizeof(float));
|
||||
l.output = calloc(l.outputs*batch, sizeof(float));;
|
||||
|
||||
l.forward = forward_shortcut_layer;
|
||||
l.backward = backward_shortcut_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_shortcut_layer_gpu;
|
||||
l.backward_gpu = backward_shortcut_layer_gpu;
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch);
|
||||
l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
|
||||
#endif
|
||||
|
@ -19,7 +19,13 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
|
||||
l.outputs = inputs;
|
||||
l.output = calloc(inputs*batch, sizeof(float));
|
||||
l.delta = calloc(inputs*batch, sizeof(float));
|
||||
|
||||
l.forward = forward_softmax_layer;
|
||||
l.backward = backward_softmax_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_softmax_layer_gpu;
|
||||
l.backward_gpu = backward_softmax_layer_gpu;
|
||||
|
||||
l.output_gpu = cuda_make_array(l.output, inputs*batch);
|
||||
l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
|
||||
#endif
|
||||
|
21
src/utils.c
21
src/utils.c
@ -135,23 +135,20 @@ void pm(int M, int N, float *A)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
char *find_replace(char *str, char *orig, char *rep)
|
||||
void find_replace(char *str, char *orig, char *rep, char *output)
|
||||
{
|
||||
static char buffer[4096];
|
||||
static char buffer2[4096];
|
||||
static char buffer3[4096];
|
||||
char buffer[4096] = {0};
|
||||
char *p;
|
||||
|
||||
if(!(p = strstr(str, orig))) // Is 'orig' even in 'str'?
|
||||
return str;
|
||||
sprintf(buffer, "%s", str);
|
||||
if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'?
|
||||
sprintf(output, "%s", str);
|
||||
return;
|
||||
}
|
||||
|
||||
strncpy(buffer2, str, p-str); // Copy characters from 'str' start to 'orig' st$
|
||||
buffer2[p-str] = '\0';
|
||||
*p = '\0';
|
||||
|
||||
sprintf(buffer3, "%s%s%s", buffer2, rep, p+strlen(orig));
|
||||
sprintf(buffer, "%s", buffer3);
|
||||
|
||||
return buffer;
|
||||
sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig));
|
||||
}
|
||||
|
||||
float sec(clock_t clocks)
|
||||
|
@ -19,7 +19,7 @@ void read_all(int fd, char *buffer, size_t bytes);
|
||||
void write_all(int fd, char *buffer, size_t bytes);
|
||||
int read_all_fail(int fd, char *buffer, size_t bytes);
|
||||
int write_all_fail(int fd, char *buffer, size_t bytes);
|
||||
char *find_replace(char *str, char *orig, char *rep);
|
||||
void find_replace(char *str, char *orig, char *rep, char *output);
|
||||
void error(const char *s);
|
||||
void malloc_error();
|
||||
void file_error(char *s);
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
image get_image_from_stream(CvCapture *cap);
|
||||
#endif
|
||||
|
||||
void extract_voxel(char *lfile, char *rfile, char *prefix)
|
||||
|
@ -1,86 +0,0 @@
|
||||
#include "xnor_layer.h"
|
||||
#include "binary_convolution.h"
|
||||
#include "convolutional_layer.h"
|
||||
|
||||
layer make_xnor_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize)
|
||||
{
|
||||
int i;
|
||||
layer l = {0};
|
||||
l.type = XNOR;
|
||||
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.n = n;
|
||||
l.batch = batch;
|
||||
l.stride = stride;
|
||||
l.size = size;
|
||||
l.pad = pad;
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
l.filters = calloc(c*n*size*size, sizeof(float));
|
||||
l.biases = calloc(n, sizeof(float));
|
||||
|
||||
int out_h = convolutional_out_height(l);
|
||||
int out_w = convolutional_out_width(l);
|
||||
l.out_h = out_h;
|
||||
l.out_w = out_w;
|
||||
l.out_c = n;
|
||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
|
||||
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
|
||||
if(batch_normalize){
|
||||
l.scales = calloc(n, sizeof(float));
|
||||
for(i = 0; i < n; ++i){
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
l.mean = calloc(n, sizeof(float));
|
||||
l.variance = calloc(n, sizeof(float));
|
||||
|
||||
l.rolling_mean = calloc(n, sizeof(float));
|
||||
l.rolling_variance = calloc(n, sizeof(float));
|
||||
}
|
||||
|
||||
l.activation = activation;
|
||||
|
||||
fprintf(stderr, "XNOR Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
void forward_xnor_layer(const layer l, network_state state)
|
||||
{
|
||||
int b = l.n;
|
||||
int c = l.c;
|
||||
int ix = l.w;
|
||||
int iy = l.h;
|
||||
int wx = l.size;
|
||||
int wy = l.size;
|
||||
int s = l.stride;
|
||||
int pad = l.pad * (l.size/2);
|
||||
|
||||
// MANDATORY: Make the binary layer
|
||||
ai2_bin_conv_layer al = ai2_make_bin_conv_layer(b, c, ix, iy, wx, wy, s, pad);
|
||||
|
||||
// OPTIONAL: You need to set the real-valued input like:
|
||||
ai2_setFltInput_unpadded(&al, state.input);
|
||||
// The above function will automatically binarize the input for the layer (channel wise).
|
||||
// If commented: using the default 0-valued input.
|
||||
|
||||
ai2_setFltWeights(&al, l.filters);
|
||||
// The above function will automatically binarize the input for the layer (channel wise).
|
||||
// If commented: using the default 0-valued weights.
|
||||
|
||||
// MANDATORY: Call forward
|
||||
ai2_bin_forward(&al);
|
||||
|
||||
// OPTIONAL: Inspect outputs
|
||||
float *output = ai2_getFltOutput(&al); // output is of size l.px * l.py where px and py are the padded outputs
|
||||
|
||||
memcpy(l.output, output, l.outputs*sizeof(float));
|
||||
// MANDATORY: Free layer
|
||||
ai2_free_bin_conv_layer(&al);
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
#ifndef XNOR_LAYER_H
|
||||
#define XNOR_LAYER_H
|
||||
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
|
||||
layer make_xnor_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalization);
|
||||
void forward_xnor_layer(const layer l, network_state state);
|
||||
|
||||
#endif
|
||||
|
79
src/yolo.c
79
src/yolo.c
@ -11,7 +11,6 @@
|
||||
#endif
|
||||
|
||||
char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
|
||||
image voc_labels[20];
|
||||
|
||||
void train_yolo(char *cfgfile, char *weightfile)
|
||||
{
|
||||
@ -88,34 +87,6 @@ void train_yolo(char *cfgfile, char *weightfile)
|
||||
save_weights(net, buff);
|
||||
}
|
||||
|
||||
void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
|
||||
{
|
||||
int i,j,n;
|
||||
//int per_cell = 5*num+classes;
|
||||
for (i = 0; i < side*side; ++i){
|
||||
int row = i / side;
|
||||
int col = i % side;
|
||||
for(n = 0; n < num; ++n){
|
||||
int index = i*num + n;
|
||||
int p_index = side*side*classes + i*num + n;
|
||||
float scale = predictions[p_index];
|
||||
int box_index = side*side*(classes + num) + (i*num + n)*4;
|
||||
boxes[index].x = (predictions[box_index + 0] + col) / side * w;
|
||||
boxes[index].y = (predictions[box_index + 1] + row) / side * h;
|
||||
boxes[index].w = pow(predictions[box_index + 2], (square?2:1)) * w;
|
||||
boxes[index].h = pow(predictions[box_index + 3], (square?2:1)) * h;
|
||||
for(j = 0; j < classes; ++j){
|
||||
int class_index = i*classes;
|
||||
float prob = scale*predictions[class_index+j];
|
||||
probs[index][j] = (prob > thresh) ? prob : 0;
|
||||
}
|
||||
if(only_objectness){
|
||||
probs[index][0] = scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_yolo_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h)
|
||||
{
|
||||
int i, j;
|
||||
@ -155,8 +126,6 @@ void validate_yolo(char *cfgfile, char *weightfile)
|
||||
|
||||
layer l = net.layers[net.n-1];
|
||||
int classes = l.classes;
|
||||
int square = l.sqrt;
|
||||
int side = l.side;
|
||||
|
||||
int j;
|
||||
FILE **fps = calloc(classes, sizeof(FILE *));
|
||||
@ -165,9 +134,9 @@ void validate_yolo(char *cfgfile, char *weightfile)
|
||||
snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
|
||||
fps[j] = fopen(buff, "w");
|
||||
}
|
||||
box *boxes = calloc(side*side*l.n, sizeof(box));
|
||||
float **probs = calloc(side*side*l.n, sizeof(float *));
|
||||
for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
||||
box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
|
||||
float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
|
||||
for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
@ -213,12 +182,12 @@ void validate_yolo(char *cfgfile, char *weightfile)
|
||||
char *path = paths[i+t-nthreads];
|
||||
char *id = basecfg(path);
|
||||
float *X = val_resized[t].data;
|
||||
float *predictions = network_predict(net, X);
|
||||
network_predict(net, X);
|
||||
int w = val[t].w;
|
||||
int h = val[t].h;
|
||||
convert_detections(predictions, classes, l.n, square, side, w, h, thresh, probs, boxes, 0);
|
||||
if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, iou_thresh);
|
||||
print_yolo_detections(fps, id, boxes, probs, side*side*l.n, classes, w, h);
|
||||
get_detection_boxes(l, w, h, thresh, probs, boxes, 0);
|
||||
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, classes, iou_thresh);
|
||||
print_yolo_detections(fps, id, boxes, probs, l.side*l.side*l.n, classes, w, h);
|
||||
free(id);
|
||||
free_image(val[t]);
|
||||
free_image(val_resized[t]);
|
||||
@ -243,7 +212,6 @@ void validate_yolo_recall(char *cfgfile, char *weightfile)
|
||||
|
||||
layer l = net.layers[net.n-1];
|
||||
int classes = l.classes;
|
||||
int square = l.sqrt;
|
||||
int side = l.side;
|
||||
|
||||
int j, k;
|
||||
@ -274,14 +242,15 @@ void validate_yolo_recall(char *cfgfile, char *weightfile)
|
||||
image orig = load_image_color(path, 0, 0);
|
||||
image sized = resize_image(orig, net.w, net.h);
|
||||
char *id = basecfg(path);
|
||||
float *predictions = network_predict(net, sized.data);
|
||||
convert_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1);
|
||||
network_predict(net, sized.data);
|
||||
get_detection_boxes(l, orig.w, orig.h, thresh, probs, boxes, 1);
|
||||
if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms);
|
||||
|
||||
char *labelpath = find_replace(path, "images", "labels");
|
||||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
char labelpath[4096];
|
||||
find_replace(path, "images", "labels", labelpath);
|
||||
find_replace(labelpath, "JPEGImages", "labels", labelpath);
|
||||
find_replace(labelpath, ".jpg", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPEG", ".txt", labelpath);
|
||||
|
||||
int num_labels = 0;
|
||||
box_label *truth = read_boxes(labelpath, &num_labels);
|
||||
@ -315,7 +284,7 @@ void validate_yolo_recall(char *cfgfile, char *weightfile)
|
||||
|
||||
void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
{
|
||||
|
||||
image *alphabet = load_alphabet();
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
@ -345,12 +314,12 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
image sized = resize_image(im, net.w, net.h);
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
network_predict(net, X);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
|
||||
get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1);
|
||||
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
||||
//draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
|
||||
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
|
||||
//draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
|
||||
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
|
||||
save_image(im, "predictions");
|
||||
show_image(im, "predictions");
|
||||
|
||||
@ -366,13 +335,7 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
|
||||
void run_yolo(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < 20; ++i){
|
||||
char buff[256];
|
||||
sprintf(buff, "data/labels/%s.png", voc_names[i]);
|
||||
voc_labels[i] = load_image_color(buff, 0, 0);
|
||||
}
|
||||
|
||||
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
|
||||
float thresh = find_float_arg(argc, argv, "-thresh", .2);
|
||||
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||
int frame_skip = find_int_arg(argc, argv, "-s", 0);
|
||||
@ -388,5 +351,5 @@ void run_yolo(int argc, char **argv)
|
||||
else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, voc_labels, 20, frame_skip);
|
||||
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user