mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
idk
This commit is contained in:
parent
f996bd59a6
commit
40cc104639
4
Makefile
4
Makefile
@ -1,5 +1,5 @@
|
||||
GPU=0
|
||||
OPENCV=0
|
||||
GPU=1
|
||||
OPENCV=1
|
||||
DEBUG=0
|
||||
|
||||
ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20
|
||||
|
10
cfg/yolo.cfg
10
cfg/yolo.cfg
@ -1,17 +1,17 @@
|
||||
[net]
|
||||
batch=64
|
||||
subdivisions=64
|
||||
subdivisions=4
|
||||
height=448
|
||||
width=448
|
||||
channels=3
|
||||
learning_rate=0.001
|
||||
learning_rate=0.01
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
|
||||
policy=steps
|
||||
steps=50, 5000
|
||||
scales=10, .1
|
||||
max_batches = 8000
|
||||
steps=20000
|
||||
scales=.1
|
||||
max_batches = 35000
|
||||
|
||||
[crop]
|
||||
crop_width=448
|
||||
|
@ -28,6 +28,7 @@ typedef struct {
|
||||
ACTIVATION activation;
|
||||
COST_TYPE cost_type;
|
||||
int batch;
|
||||
int forced;
|
||||
int inputs;
|
||||
int outputs;
|
||||
int truths;
|
||||
|
@ -187,6 +187,7 @@ region_layer parse_region(list *options, size_params params)
|
||||
layer.sqrt = option_find_int(options, "sqrt", 0);
|
||||
|
||||
layer.coord_scale = option_find_float(options, "coord_scale", 1);
|
||||
layer.forced = option_find_int(options, "forced", 0);
|
||||
layer.object_scale = option_find_float(options, "object_scale", 1);
|
||||
layer.noobject_scale = option_find_float(options, "noobject_scale", 1);
|
||||
layer.class_scale = option_find_float(options, "class_scale", 1);
|
||||
|
@ -82,9 +82,12 @@ void forward_region_layer(const region_layer l, network_state state)
|
||||
|
||||
int best_index = -1;
|
||||
float best_iou = 0;
|
||||
float best_rmse = 4;
|
||||
float best_rmse = 20;
|
||||
|
||||
if (!is_obj) continue;
|
||||
if (!is_obj){
|
||||
//printf(".");
|
||||
continue;
|
||||
}
|
||||
|
||||
int class_index = index + i*l.classes;
|
||||
for(j = 0; j < l.classes; ++j) {
|
||||
@ -123,18 +126,38 @@ void forward_region_layer(const region_layer l, network_state state)
|
||||
}
|
||||
}
|
||||
}
|
||||
int p_index = index + locations*l.classes + i*l.n + best_index;
|
||||
*(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2);
|
||||
*(l.cost) += l.object_scale * pow(1-l.output[p_index], 2);
|
||||
avg_obj += l.output[p_index];
|
||||
l.delta[p_index+0] = l.object_scale * (1.-l.output[p_index]);
|
||||
|
||||
if(l.rescore){
|
||||
l.delta[p_index+0] = l.object_scale * (best_iou - l.output[p_index]);
|
||||
if(l.forced){
|
||||
if(truth.w*truth.h < .1){
|
||||
best_index = 1;
|
||||
}else{
|
||||
best_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords;
|
||||
int tbox_index = truth_index + 1 + l.classes;
|
||||
|
||||
box out = float_to_box(l.output + box_index);
|
||||
out.x /= l.side;
|
||||
out.y /= l.side;
|
||||
if (l.sqrt) {
|
||||
out.w = out.w*out.w;
|
||||
out.h = out.h*out.h;
|
||||
}
|
||||
float iou = box_iou(out, truth);
|
||||
|
||||
//printf("%d", best_index);
|
||||
int p_index = index + locations*l.classes + i*l.n + best_index;
|
||||
*(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2);
|
||||
*(l.cost) += l.object_scale * pow(1-l.output[p_index], 2);
|
||||
avg_obj += l.output[p_index];
|
||||
l.delta[p_index] = l.object_scale * (1.-l.output[p_index]);
|
||||
|
||||
if(l.rescore){
|
||||
l.delta[p_index] = l.object_scale * (iou - l.output[p_index]);
|
||||
}
|
||||
|
||||
l.delta[box_index+0] = l.coord_scale*(state.truth[tbox_index + 0] - l.output[box_index + 0]);
|
||||
l.delta[box_index+1] = l.coord_scale*(state.truth[tbox_index + 1] - l.output[box_index + 1]);
|
||||
l.delta[box_index+2] = l.coord_scale*(state.truth[tbox_index + 2] - l.output[box_index + 2]);
|
||||
@ -144,14 +167,15 @@ void forward_region_layer(const region_layer l, network_state state)
|
||||
l.delta[box_index+3] = l.coord_scale*(sqrt(state.truth[tbox_index + 3]) - l.output[box_index + 3]);
|
||||
}
|
||||
|
||||
*(l.cost) += pow(1-best_iou, 2);
|
||||
avg_iou += best_iou;
|
||||
*(l.cost) += pow(1-iou, 2);
|
||||
avg_iou += iou;
|
||||
++count;
|
||||
}
|
||||
if(l.softmax){
|
||||
gradient_array(l.output + index + locations*l.classes, locations*l.n*(1+l.coords),
|
||||
LOGISTIC, l.delta + index + locations*l.classes);
|
||||
}
|
||||
//printf("\n");
|
||||
}
|
||||
printf("Region Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
|
||||
}
|
||||
|
71
src/swag.c
71
src/swag.c
@ -1,4 +1,5 @@
|
||||
#include "network.h"
|
||||
#include "region_layer.h"
|
||||
#include "detection_layer.h"
|
||||
#include "cost_layer.h"
|
||||
#include "utils.h"
|
||||
@ -11,40 +12,37 @@
|
||||
|
||||
char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
|
||||
|
||||
void draw_swag(image im, float *box, int side, int objectness, char *label, float thresh)
|
||||
void draw_swag(image im, float *predictions, int side, int num, char *label, float thresh)
|
||||
{
|
||||
int classes = 20;
|
||||
int elems = 4+classes+objectness;
|
||||
int j;
|
||||
int r, c;
|
||||
int i,n;
|
||||
|
||||
for(r = 0; r < side; ++r){
|
||||
for(c = 0; c < side; ++c){
|
||||
j = (r*side + c) * elems;
|
||||
float scale = 1;
|
||||
if(objectness) scale = 1 - box[j++];
|
||||
int class = max_index(box+j, classes);
|
||||
if(scale * box[j+class] > thresh){
|
||||
int width = sqrt(scale*box[j+class])*5 + 1;
|
||||
printf("%f %s\n", scale * box[j+class], voc_names[class]);
|
||||
for(i = 0; i < side*side; ++i){
|
||||
int row = i / side;
|
||||
int col = i % side;
|
||||
for(n = 0; n < num; ++n){
|
||||
int p_index = side*side*classes + i*num + n;
|
||||
int box_index = side*side*(classes + num) + (i*num + n)*4;
|
||||
int class_index = i*classes;
|
||||
float scale = predictions[p_index];
|
||||
int class = max_index(predictions+class_index, classes);
|
||||
float prob = scale * predictions[class_index + class];
|
||||
if(prob > thresh){
|
||||
int width = sqrt(prob)*5 + 1;
|
||||
printf("%f %s\n", prob, voc_names[class]);
|
||||
float red = get_color(0,class,classes);
|
||||
float green = get_color(1,class,classes);
|
||||
float blue = get_color(2,class,classes);
|
||||
box b = float_to_box(predictions+box_index);
|
||||
b.x = (b.x + col)/side;
|
||||
b.y = (b.y + row)/side;
|
||||
b.w = b.w*b.w;
|
||||
b.h = b.h*b.h;
|
||||
|
||||
j += classes;
|
||||
float x = box[j+0];
|
||||
float y = box[j+1];
|
||||
x = (x+c)/side;
|
||||
y = (y+r)/side;
|
||||
float w = box[j+2]; //*maxwidth;
|
||||
float h = box[j+3]; //*maxheight;
|
||||
h = h*h;
|
||||
w = w*w;
|
||||
|
||||
int left = (x-w/2)*im.w;
|
||||
int right = (x+w/2)*im.w;
|
||||
int top = (y-h/2)*im.h;
|
||||
int bot = (y+h/2)*im.h;
|
||||
int left = (b.x-b.w/2)*im.w;
|
||||
int right = (b.x+b.w/2)*im.w;
|
||||
int top = (b.y-b.h/2)*im.h;
|
||||
int bot = (b.y+b.h/2)*im.h;
|
||||
draw_box_width(im, left, top, right, bot, width, red, green, blue);
|
||||
}
|
||||
}
|
||||
@ -103,13 +101,13 @@ void train_swag(char *cfgfile, char *weightfile)
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
/*
|
||||
image im = float_to_image(net.w, net.h, 3, train.X.vals[113]);
|
||||
image copy = copy_image(im);
|
||||
draw_swag(copy, train.y.vals[113], 7, "truth");
|
||||
cvWaitKey(0);
|
||||
free_image(copy);
|
||||
*/
|
||||
/*
|
||||
image im = float_to_image(net.w, net.h, 3, train.X.vals[113]);
|
||||
image copy = copy_image(im);
|
||||
draw_swag(copy, train.y.vals[113], 7, "truth");
|
||||
cvWaitKey(0);
|
||||
free_image(copy);
|
||||
*/
|
||||
|
||||
time=clock();
|
||||
float loss = train_network(net, train);
|
||||
@ -270,7 +268,7 @@ void test_swag(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
detection_layer layer = get_network_detection_layer(net);
|
||||
region_layer layer = net.layers[net.n-1];
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
clock_t time;
|
||||
@ -292,7 +290,8 @@ void test_swag(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
draw_swag(im, predictions, 7, layer.objectness, "predictions", thresh);
|
||||
draw_swag(im, predictions, layer.side, layer.n, "predictions", thresh);
|
||||
show_image(sized, "resized");
|
||||
free_image(im);
|
||||
free_image(sized);
|
||||
#ifdef OPENCV
|
||||
|
@ -65,7 +65,6 @@ void train_yolo(char *cfgfile, char *weightfile)
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
detection_layer layer = get_network_detection_layer(net);
|
||||
int imgs = 128;
|
||||
int i = *net.seen/imgs;
|
||||
|
||||
@ -74,11 +73,16 @@ void train_yolo(char *cfgfile, char *weightfile)
|
||||
int N = plist->size;
|
||||
paths = (char **)list_to_array(plist);
|
||||
|
||||
if(i*imgs > N*80){
|
||||
net.layers[net.n-1].objectness = 0;
|
||||
net.layers[net.n-1].joint = 1;
|
||||
}
|
||||
if(i*imgs > N*120){
|
||||
net.layers[net.n-1].rescore = 1;
|
||||
}
|
||||
data train, buffer;
|
||||
|
||||
detection_layer layer = get_network_detection_layer(net);
|
||||
int classes = layer.classes;
|
||||
int background = layer.objectness;
|
||||
int side = sqrt(get_detection_layer_locations(layer));
|
||||
|
Loading…
Reference in New Issue
Block a user