mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
In chess, the pawns go first
This commit is contained in:
parent
d856ec56fb
commit
88b9ecb414
2
Makefile
2
Makefile
@ -48,7 +48,7 @@ CFLAGS+= -DCUDNN
|
||||
LDFLAGS+= -lcudnn
|
||||
endif
|
||||
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o regressor.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o lsd.o super.o voxel.o tree.o
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o segmenter.o regressor.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o lsd.o super.o voxel.o tree.o
|
||||
ifeq ($(GPU), 1)
|
||||
LDFLAGS+= -lstdc++
|
||||
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
|
||||
|
@ -700,7 +700,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
|
||||
float *predictions = network_predict(net, X);
|
||||
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1, 1);
|
||||
top_k(predictions, net.outputs, top, indexes);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
for(i = 0; i < top; ++i){
|
||||
int index = indexes[i];
|
||||
//if(net.hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net.hierarchy->parent[index] >= 0) ? names[net.hierarchy->parent[index]] : "Root");
|
||||
|
@ -21,6 +21,7 @@ extern void run_dice(int argc, char **argv);
|
||||
extern void run_compare(int argc, char **argv);
|
||||
extern void run_classifier(int argc, char **argv);
|
||||
extern void run_regressor(int argc, char **argv);
|
||||
extern void run_segmenter(int argc, char **argv);
|
||||
extern void run_char_rnn(int argc, char **argv);
|
||||
extern void run_vid_rnn(int argc, char **argv);
|
||||
extern void run_tag(int argc, char **argv);
|
||||
@ -442,6 +443,8 @@ int main(int argc, char **argv)
|
||||
run_classifier(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "regressor")){
|
||||
run_regressor(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "segmenter")){
|
||||
run_segmenter(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "art")){
|
||||
run_art(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "tag")){
|
||||
|
128
src/data.c
128
src/data.c
@ -116,7 +116,7 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,
|
||||
if(center){
|
||||
crop = center_crop_image(im, size, size);
|
||||
} else {
|
||||
crop = random_augment_image(im, angle, aspect, min, max, size);
|
||||
crop = random_augment_image(im, angle, aspect, min, max, size, size);
|
||||
}
|
||||
int flip = rand()%2;
|
||||
if (flip) flip_image(crop);
|
||||
@ -511,6 +511,119 @@ void free_data(data d)
|
||||
}
|
||||
}
|
||||
|
||||
void load_rle(image im, int *rle, int n)
|
||||
{
|
||||
int count = 0;
|
||||
int curr = 0;
|
||||
int i,j;
|
||||
for(i = 0; i < n; ++i){
|
||||
for(j = 0; j < rle[i]; ++j){
|
||||
im.data[count++] = curr;
|
||||
}
|
||||
curr = 1 - curr;
|
||||
}
|
||||
for(; count < im.h*im.w*im.c; ++count){
|
||||
im.data[count] = curr;
|
||||
}
|
||||
}
|
||||
|
||||
void or_image(image src, image dest, int c)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < src.w*src.h; ++i){
|
||||
if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void fill_bg_mask(image m)
|
||||
{
|
||||
int i,k;
|
||||
int index = m.w*m.h*(m.c-1);
|
||||
for(i = 0; i < m.w*m.h; ++i){
|
||||
m.data[index + i] = 1;
|
||||
}
|
||||
for(k = 0; k < m.c-1; ++k){
|
||||
for(i = 0; i < m.w*m.h; ++i){
|
||||
if(m.data[index + i] && m.data[k*m.w*m.h + i]) m.data[index + i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image get_segmentation_image(char *path, int w, int h, int classes)
|
||||
{
|
||||
char labelpath[4096];
|
||||
find_replace(path, "images", "mask", labelpath);
|
||||
find_replace(labelpath, "JPEGImages", "mask", labelpath);
|
||||
find_replace(labelpath, ".jpg", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPG", ".txt", labelpath);
|
||||
find_replace(labelpath, ".JPEG", ".txt", labelpath);
|
||||
image mask = make_image(w, h, classes+1);
|
||||
FILE *file = fopen(labelpath, "r");
|
||||
if(!file) file_error(labelpath);
|
||||
char buff[32788];
|
||||
int id;
|
||||
image part = make_image(w, h, 1);
|
||||
while(fscanf(file, "%d %s", &id, buff) == 2){
|
||||
int n = 0;
|
||||
int *rle = read_intlist(buff, &n, 0);
|
||||
load_rle(part, rle, n);
|
||||
or_image(part, mask, id);
|
||||
free(rle);
|
||||
}
|
||||
fill_bg_mask(mask);
|
||||
fclose(file);
|
||||
free_image(part);
|
||||
return mask;
|
||||
}
|
||||
|
||||
data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure)
|
||||
{
|
||||
char **random_paths = get_random_paths(paths, n, m);
|
||||
int i;
|
||||
data d = {0};
|
||||
d.shallow = 0;
|
||||
|
||||
d.X.rows = n;
|
||||
d.X.vals = calloc(d.X.rows, sizeof(float*));
|
||||
d.X.cols = h*w*3;
|
||||
|
||||
|
||||
d.y.rows = n;
|
||||
d.y.cols = h*w*(classes+1);
|
||||
d.y.vals = calloc(d.X.rows, sizeof(float*));
|
||||
|
||||
for(i = 0; i < n; ++i){
|
||||
image orig = load_image_color(random_paths[i], 0, 0);
|
||||
augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h);
|
||||
image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect);
|
||||
|
||||
int flip = rand()%2;
|
||||
if(flip) flip_image(sized);
|
||||
random_distort_image(sized, hue, saturation, exposure);
|
||||
d.X.vals[i] = sized.data;
|
||||
|
||||
image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes);
|
||||
//image mask = make_image(orig.w, orig.h, classes+1);
|
||||
image sized_m = rotate_crop_image(mask, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect);
|
||||
|
||||
if(flip) flip_image(sized_m);
|
||||
d.y.vals[i] = sized_m.data;
|
||||
|
||||
free_image(orig);
|
||||
free_image(mask);
|
||||
|
||||
/*
|
||||
image rgb = mask_to_rgb(sized_m, classes);
|
||||
show_image(rgb, "part");
|
||||
show_image(sized, "orig");
|
||||
cvWaitKey(0);
|
||||
free_image(rgb);
|
||||
*/
|
||||
}
|
||||
free(random_paths);
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure)
|
||||
{
|
||||
char **random_paths = get_random_paths(paths, n, m);
|
||||
@ -698,7 +811,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
|
||||
image orig = load_image_color(random_paths[i], 0, 0);
|
||||
image sized = make_image(w, h, orig.c);
|
||||
fill_image(sized, .5);
|
||||
|
||||
|
||||
float dw = jitter * orig.w;
|
||||
float dh = jitter * orig.h;
|
||||
|
||||
@ -706,7 +819,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
|
||||
float scale = rand_uniform(.25, 2);
|
||||
|
||||
float nw, nh;
|
||||
|
||||
|
||||
if(new_ar < 1){
|
||||
nh = scale * h;
|
||||
nw = nh * new_ar;
|
||||
@ -752,6 +865,8 @@ void *load_thread(void *ptr)
|
||||
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
|
||||
} else if (a.type == WRITING_DATA){
|
||||
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
|
||||
} else if (a.type == SEGMENTATION_DATA){
|
||||
*a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == REGION_DATA){
|
||||
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == DETECTION_DATA){
|
||||
@ -811,6 +926,13 @@ void *load_threads(void *ptr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void load_data_blocking(load_args args)
|
||||
{
|
||||
struct load_args *ptr = calloc(1, sizeof(struct load_args));
|
||||
*ptr = args;
|
||||
load_thread(ptr);
|
||||
}
|
||||
|
||||
pthread_t load_data(load_args args)
|
||||
{
|
||||
pthread_t thread;
|
||||
|
@ -28,7 +28,7 @@ typedef struct{
|
||||
} data;
|
||||
|
||||
typedef enum {
|
||||
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA
|
||||
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA
|
||||
} data_type;
|
||||
|
||||
typedef struct load_args{
|
||||
@ -72,6 +72,7 @@ typedef struct{
|
||||
void free_data(data d);
|
||||
|
||||
pthread_t load_data(load_args args);
|
||||
void load_data_blocking(load_args args);
|
||||
|
||||
pthread_t load_data_in_thread(load_args args);
|
||||
|
||||
|
42
src/image.c
42
src/image.c
@ -25,6 +25,25 @@ float get_color(int c, int x, int max)
|
||||
return r;
|
||||
}
|
||||
|
||||
image mask_to_rgb(image mask)
|
||||
{
|
||||
int n = mask.c;
|
||||
image im = make_image(mask.w, mask.h, 3);
|
||||
int i, j;
|
||||
for(j = 0; j < n; ++j){
|
||||
int offset = j*123457 % n;
|
||||
float red = get_color(2,offset,n);
|
||||
float green = get_color(1,offset,n);
|
||||
float blue = get_color(0,offset,n);
|
||||
for(i = 0; i < im.w*im.h; ++i){
|
||||
im.data[i + 0*im.w*im.h] = mask.data[j*im.h*im.w + i]*red;
|
||||
im.data[i + 1*im.w*im.h] = mask.data[j*im.h*im.w + i]*green;
|
||||
im.data[i + 2*im.w*im.h] = mask.data[j*im.h*im.w + i]*blue;
|
||||
}
|
||||
}
|
||||
return im;
|
||||
}
|
||||
|
||||
void composite_image(image source, image dest, int dx, int dy)
|
||||
{
|
||||
int x,y,k;
|
||||
@ -902,8 +921,9 @@ image random_crop_image(image im, int w, int h)
|
||||
return crop;
|
||||
}
|
||||
|
||||
image random_augment_image(image im, float angle, float aspect, int low, int high, int size)
|
||||
augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h)
|
||||
{
|
||||
augment_args a = {0};
|
||||
aspect = rand_scale(aspect);
|
||||
int r = rand_int(low, high);
|
||||
int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
|
||||
@ -911,15 +931,27 @@ image random_augment_image(image im, float angle, float aspect, int low, int hig
|
||||
|
||||
float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
|
||||
|
||||
float dx = (im.w*scale/aspect - size) / 2.;
|
||||
float dy = (im.h*scale - size) / 2.;
|
||||
float dx = (im.w*scale/aspect - w) / 2.;
|
||||
float dy = (im.h*scale - w) / 2.;
|
||||
if(dx < 0) dx = 0;
|
||||
if(dy < 0) dy = 0;
|
||||
dx = rand_uniform(-dx, dx);
|
||||
dy = rand_uniform(-dy, dy);
|
||||
|
||||
image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy, aspect);
|
||||
a.rad = rad;
|
||||
a.scale = scale;
|
||||
a.w = w;
|
||||
a.h = h;
|
||||
a.dx = dx;
|
||||
a.dy = dy;
|
||||
a.aspect = aspect;
|
||||
return a;
|
||||
}
|
||||
|
||||
image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h)
|
||||
{
|
||||
augment_args a = random_augment_args(im, angle, aspect, low, high, w, h);
|
||||
image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect);
|
||||
return crop;
|
||||
}
|
||||
|
||||
@ -1300,7 +1332,7 @@ void test_resize(char *filename)
|
||||
show_image(c4, "C4");
|
||||
#ifdef OPENCV
|
||||
while(1){
|
||||
image aug = random_augment_image(im, 0, .75, 320, 448, 320);
|
||||
image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320);
|
||||
show_image(aug, "aug");
|
||||
free_image(aug);
|
||||
|
||||
|
15
src/image.h
15
src/image.h
@ -19,6 +19,16 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
float scale;
|
||||
float rad;
|
||||
float dx;
|
||||
float dy;
|
||||
float aspect;
|
||||
} augment_args;
|
||||
|
||||
typedef struct {
|
||||
int h;
|
||||
int w;
|
||||
@ -37,6 +47,7 @@ void show_image_cv(image p, const char *name, IplImage *disp);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
image mask_to_rgb(image mask);
|
||||
float get_color(int c, int x, int max);
|
||||
void flip_image(image a);
|
||||
void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b);
|
||||
@ -48,9 +59,11 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs,
|
||||
image image_distance(image a, image b);
|
||||
void scale_image(image m, float s);
|
||||
image crop_image(image im, int dx, int dy, int w, int h);
|
||||
image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect);
|
||||
image center_crop_image(image im, int w, int h);
|
||||
image random_crop_image(image im, int w, int h);
|
||||
image random_augment_image(image im, float angle, float aspect, int low, int high, int size);
|
||||
image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h);
|
||||
augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h);
|
||||
void random_distort_image(image im, float hue, float saturation, float exposure);
|
||||
image letterbox_image(image im, int w, int h);
|
||||
void letterbox_image_into(image im, int w, int h, image boxed);
|
||||
|
@ -73,6 +73,7 @@ struct layer{
|
||||
int stride;
|
||||
int reverse;
|
||||
int flatten;
|
||||
int spatial;
|
||||
int pad;
|
||||
int sqrt;
|
||||
int flip;
|
||||
|
@ -258,6 +258,10 @@ softmax_layer parse_softmax(list *options, size_params params)
|
||||
layer.temperature = option_find_float_quiet(options, "temperature", 1);
|
||||
char *tree_file = option_find_str(options, "tree", 0);
|
||||
if (tree_file) layer.softmax_tree = read_tree(tree_file);
|
||||
layer.w = params.w;
|
||||
layer.h = params.h;
|
||||
layer.c = params.c;
|
||||
layer.spatial = option_find_float_quiet(options, "spatial", 0);
|
||||
return layer;
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
|
||||
l.bias_updates = calloc(n*2, sizeof(float));
|
||||
l.outputs = h*w*n*(classes + coords + 1);
|
||||
l.inputs = l.outputs;
|
||||
l.truths = 30*(5);
|
||||
l.truths = 30*(l.coords + 1);
|
||||
l.delta = calloc(batch*l.outputs, sizeof(float));
|
||||
l.output = calloc(batch*l.outputs, sizeof(float));
|
||||
int i;
|
||||
@ -153,7 +153,7 @@ void forward_region_layer(const layer l, network net)
|
||||
int index = entry_index(l, b, n*l.w*l.h, 0);
|
||||
activate_array(l.output + index, 2*l.w*l.h, LOGISTIC);
|
||||
index = entry_index(l, b, n*l.w*l.h, 4);
|
||||
activate_array(l.output + index, l.w*l.h, LOGISTIC);
|
||||
if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC);
|
||||
}
|
||||
}
|
||||
if (l.softmax_tree){
|
||||
@ -165,8 +165,8 @@ void forward_region_layer(const layer l, network net)
|
||||
count += group_size;
|
||||
}
|
||||
} else if (l.softmax){
|
||||
int index = entry_index(l, 0, 0, 5);
|
||||
softmax_cpu(net.input + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index);
|
||||
int index = entry_index(l, 0, 0, l.coords + !l.background);
|
||||
softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -184,9 +184,9 @@ void forward_region_layer(const layer l, network net)
|
||||
if(l.softmax_tree){
|
||||
int onlyclass = 0;
|
||||
for(t = 0; t < 30; ++t){
|
||||
box truth = float_to_box(net.truth + t*5 + b*l.truths, 1);
|
||||
box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1);
|
||||
if(!truth.x) break;
|
||||
int class = net.truth[t*5 + b*l.truths + 4];
|
||||
int class = net.truth[t*(l.coords + 1) + b*l.truths + 4];
|
||||
float maxp = 0;
|
||||
int maxi = 0;
|
||||
if(truth.x > 100000 && truth.y > 100000){
|
||||
@ -230,6 +230,7 @@ void forward_region_layer(const layer l, network net)
|
||||
int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4);
|
||||
avg_anyobj += l.output[obj_index];
|
||||
l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]);
|
||||
if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]);
|
||||
if (best_iou > l.thresh) {
|
||||
l.delta[obj_index] = 0;
|
||||
}
|
||||
@ -282,16 +283,19 @@ void forward_region_layer(const layer l, network net)
|
||||
avg_iou += iou;
|
||||
|
||||
//l.delta[best_index + 4] = iou - l.output[best_index + 4];
|
||||
int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4);
|
||||
int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords);
|
||||
avg_obj += l.output[obj_index];
|
||||
l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]);
|
||||
if (l.rescore) {
|
||||
l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]);
|
||||
}
|
||||
if(l.background){
|
||||
l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]);
|
||||
}
|
||||
|
||||
int class = net.truth[t*5 + b*l.truths + 4];
|
||||
int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords];
|
||||
if (l.map) class = l.map[class];
|
||||
int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 5);
|
||||
int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1);
|
||||
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
|
||||
++count;
|
||||
++class_count;
|
||||
@ -380,10 +384,10 @@ void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, f
|
||||
}
|
||||
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
|
||||
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
|
||||
float scale = predictions[obj_index];
|
||||
float scale = l.background ? 1 : predictions[obj_index];
|
||||
boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
|
||||
|
||||
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5);
|
||||
int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background);
|
||||
if(l.softmax_tree){
|
||||
|
||||
hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h);
|
||||
@ -436,8 +440,8 @@ void forward_region_layer_gpu(const layer l, network net)
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = entry_index(l, b, n*l.w*l.h, 0);
|
||||
activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC);
|
||||
index = entry_index(l, b, n*l.w*l.h, 4);
|
||||
activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC);
|
||||
index = entry_index(l, b, n*l.w*l.h, l.coords);
|
||||
if(!l.background) activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC);
|
||||
}
|
||||
}
|
||||
if (l.softmax_tree){
|
||||
@ -450,9 +454,9 @@ void forward_region_layer_gpu(const layer l, network net)
|
||||
count += group_size;
|
||||
}
|
||||
} else if (l.softmax) {
|
||||
int index = entry_index(l, 0, 0, 5);
|
||||
int index = entry_index(l, 0, 0, l.coords + !l.background);
|
||||
//printf("%d\n", index);
|
||||
softmax_gpu(net.input_gpu + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
|
||||
softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
|
||||
}
|
||||
if(!net.train || l.onlyforward){
|
||||
cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
|
||||
@ -479,8 +483,8 @@ void backward_region_layer_gpu(const layer l, network net)
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = entry_index(l, b, n*l.w*l.h, 0);
|
||||
gradient_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index);
|
||||
index = entry_index(l, b, n*l.w*l.h, 4);
|
||||
gradient_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index);
|
||||
index = entry_index(l, b, n*l.w*l.h, l.coords);
|
||||
if(!l.background) gradient_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index);
|
||||
}
|
||||
}
|
||||
axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
|
||||
|
262
src/segmenter.c
Normal file
262
src/segmenter.c
Normal file
@ -0,0 +1,262 @@
|
||||
#include "network.h"
|
||||
#include "utils.h"
|
||||
#include "parser.h"
|
||||
#include "option_list.h"
|
||||
#include "blas.h"
|
||||
#include "assert.h"
|
||||
#include "cuda.h"
|
||||
#include <sys/time.h>
|
||||
|
||||
void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
|
||||
{
|
||||
int i;
|
||||
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
printf("%d\n", ngpus);
|
||||
network *nets = calloc(ngpus, sizeof(network));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
srand(seed);
|
||||
#ifdef GPU
|
||||
cuda_set_device(gpus[i]);
|
||||
#endif
|
||||
nets[i] = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&nets[i], weightfile);
|
||||
}
|
||||
if(clear) *nets[i].seen = 0;
|
||||
nets[i].learning_rate *= ngpus;
|
||||
}
|
||||
srand(time(0));
|
||||
network net = nets[0];
|
||||
|
||||
int imgs = net.batch * net.subdivisions * ngpus;
|
||||
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
list *options = read_data_cfg(datacfg);
|
||||
|
||||
char *backup_directory = option_find_str(options, "backup", "/backup/");
|
||||
char *train_list = option_find_str(options, "train", "data/train.list");
|
||||
|
||||
list *plist = get_paths(train_list);
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
int N = plist->size;
|
||||
clock_t time;
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.threads = 32;
|
||||
|
||||
args.min = net.min_crop;
|
||||
args.max = net.max_crop;
|
||||
args.angle = net.angle;
|
||||
args.aspect = net.aspect;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.size = net.w;
|
||||
args.classes = 80;
|
||||
|
||||
args.paths = paths;
|
||||
args.n = imgs;
|
||||
args.m = N;
|
||||
args.type = SEGMENTATION_DATA;
|
||||
|
||||
data train;
|
||||
data buffer;
|
||||
pthread_t load_thread;
|
||||
args.d = &buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
int epoch = (*net.seen)/N;
|
||||
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
||||
time=clock();
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
|
||||
float loss = 0;
|
||||
#ifdef GPU
|
||||
if(ngpus == 1){
|
||||
loss = train_network(net, train);
|
||||
} else {
|
||||
loss = train_networks(nets, ngpus, train, 4);
|
||||
}
|
||||
#else
|
||||
loss = train_network(net, train);
|
||||
#endif
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
||||
free_data(train);
|
||||
if(*net.seen/N > epoch){
|
||||
epoch = *net.seen/N;
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(get_current_batch(net)%100 == 0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup",backup_directory,base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.weights", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
|
||||
free_network(net);
|
||||
free_ptrs((void**)paths, plist->size);
|
||||
free_list(plist);
|
||||
free(base);
|
||||
}
|
||||
|
||||
void predict_segmenter(char *datafile, char *cfgfile, char *weightfile, char *filename)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
|
||||
clock_t time;
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input, 0, 0);
|
||||
image sized = letterbox_image(im, net.w, net.h);
|
||||
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
image m = float_to_image(sized.w, sized.h, 80, predictions);
|
||||
image rgb = mask_to_rgb(m);
|
||||
show_image(sized, "orig");
|
||||
show_image(rgb, "pred");
|
||||
cvWaitKey(0);
|
||||
printf("Predicted: %f\n", predictions[0]);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
free_image(im);
|
||||
free_image(sized);
|
||||
free_image(rgb);
|
||||
if (filename) break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void demo_segmenter(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
printf("Regressor Demo\n");
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
|
||||
srand(2222222);
|
||||
CvCapture * cap;
|
||||
|
||||
if(filename){
|
||||
cap = cvCaptureFromFile(filename);
|
||||
}else{
|
||||
cap = cvCaptureFromCAM(cam_index);
|
||||
}
|
||||
|
||||
if(!cap) error("Couldn't connect to webcam.\n");
|
||||
cvNamedWindow("Regressor", CV_WINDOW_NORMAL);
|
||||
cvResizeWindow("Regressor", 512, 512);
|
||||
float fps = 0;
|
||||
|
||||
while(1){
|
||||
struct timeval tval_before, tval_after, tval_result;
|
||||
gettimeofday(&tval_before, NULL);
|
||||
|
||||
image in = get_image_from_stream(cap);
|
||||
image in_s = letterbox_image(in, net.w, net.h);
|
||||
show_image(in, "Regressor");
|
||||
|
||||
float *predictions = network_predict(net, in_s.data);
|
||||
|
||||
printf("\033[2J");
|
||||
printf("\033[1;1H");
|
||||
printf("\nFPS:%.0f\n",fps);
|
||||
|
||||
printf("People: %f\n", predictions[0]);
|
||||
|
||||
free_image(in_s);
|
||||
free_image(in);
|
||||
|
||||
cvWaitKey(10);
|
||||
|
||||
gettimeofday(&tval_after, NULL);
|
||||
timersub(&tval_after, &tval_before, &tval_result);
|
||||
float curr = 1000000.f/((long int)tval_result.tv_usec);
|
||||
fps = .9*fps + .1*curr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void run_segmenter(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
|
||||
int *gpus = 0;
|
||||
int gpu = 0;
|
||||
int ngpus = 0;
|
||||
if(gpu_list){
|
||||
printf("%s\n", gpu_list);
|
||||
int len = strlen(gpu_list);
|
||||
ngpus = 1;
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
if (gpu_list[i] == ',') ++ngpus;
|
||||
}
|
||||
gpus = calloc(ngpus, sizeof(int));
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
gpus[i] = atoi(gpu_list);
|
||||
gpu_list = strchr(gpu_list, ',')+1;
|
||||
}
|
||||
} else {
|
||||
gpu = gpu_index;
|
||||
gpus = &gpu;
|
||||
ngpus = 1;
|
||||
}
|
||||
|
||||
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||
int clear = find_arg(argc, argv, "-clear");
|
||||
char *data = argv[3];
|
||||
char *cfg = argv[4];
|
||||
char *weights = (argc > 5) ? argv[5] : 0;
|
||||
char *filename = (argc > 6) ? argv[6]: 0;
|
||||
if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename);
|
||||
else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename);
|
||||
}
|
||||
|
||||
|
@ -70,7 +70,11 @@ void forward_softmax_layer_gpu(const softmax_layer l, network net)
|
||||
count += group_size;
|
||||
}
|
||||
} else {
|
||||
softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
|
||||
if(l.spatial){
|
||||
softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu);
|
||||
}else{
|
||||
softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user