this'll teach me to mess with maxpooling

This commit is contained in:
Joseph Redmon 2018-08-03 15:57:48 -07:00
parent e209b3bbbf
commit b13f67bfdd
23 changed files with 737 additions and 130 deletions

View File

@ -57,8 +57,8 @@ CFLAGS+= -DCUDNN
LDFLAGS+= -lcudnn LDFLAGS+= -lcudnn
endif endif
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o
EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o darknet.o EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o
ifeq ($(GPU), 1) ifeq ($(GPU), 1)
LDFLAGS+= -lstdc++ LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o

View File

@ -24,7 +24,6 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)
while(1){ while(1){
image in = get_image_from_stream(cap); image in = get_image_from_stream(cap);
image in_s = resize_image(in, net->w, net->h); image in_s = resize_image(in, net->w, net->h);
show_image(in, window);
float *p = network_predict(net, in_s.data); float *p = network_predict(net, in_s.data);
@ -45,10 +44,9 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)
} }
printf("]\n"); printf("]\n");
show_image(in, window, 1);
free_image(in_s); free_image(in_s);
free_image(in); free_image(in);
cvWaitKey(1);
} }
#endif #endif
} }

View File

@ -645,6 +645,45 @@ void label_classifier(char *datacfg, char *filename, char *weightfile)
} }
} }
void csv_classifier(char *datacfg, char *cfgfile, char *weightfile)
{
int i,j;
network *net = load_network(cfgfile, weightfile, 0);
srand(time(0));
list *options = read_data_cfg(datacfg);
char *test_list = option_find_str(options, "test", "data/test.list");
int top = option_find_int(options, "top", 1);
list *plist = get_paths(test_list);
char **paths = (char **)list_to_array(plist);
int m = plist->size;
free_list(plist);
int *indexes = calloc(top, sizeof(int));
for(i = 0; i < m; ++i){
double time = what_time_is_it_now();
char *path = paths[i];
image im = load_image_color(path, 0, 0);
image r = letterbox_image(im, net->w, net->h);
float *predictions = network_predict(net, r.data);
if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
top_k(predictions, net->outputs, top, indexes);
printf("%s", path);
for(j = 0; j < top; ++j){
printf("\t%d", indexes[j]);
}
printf("\n");
free_image(im);
free_image(r);
fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m);
}
}
void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer)
{ {
@ -869,8 +908,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
} }
if(1){ if(1){
show_image(out, "Threat"); show_image(out, "Threat", 10);
cvWaitKey(10);
} }
free_image(in_s); free_image(in_s);
free_image(in); free_image(in);
@ -922,7 +960,6 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
image in = get_image_from_stream(cap); image in = get_image_from_stream(cap);
image in_s = resize_image(in, net->w, net->h); image in_s = resize_image(in, net->w, net->h);
show_image(in, "Threat Detection");
float *predictions = network_predict(net, in_s.data); float *predictions = network_predict(net, in_s.data);
top_predictions(net, top, indexes); top_predictions(net, top, indexes);
@ -947,11 +984,10 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
} }
} }
show_image(in, "Threat Detection", 10);
free_image(in_s); free_image(in_s);
free_image(in); free_image(in);
cvWaitKey(10);
gettimeofday(&tval_after, NULL); gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result); timersub(&tval_after, &tval_before, &tval_result);
float curr = 1000000.f/((long int)tval_result.tv_usec); float curr = 1000000.f/((long int)tval_result.tv_usec);
@ -1036,12 +1072,10 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
free_image(label); free_image(label);
} }
show_image(in, base); show_image(in, base, 10);
free_image(in_s); free_image(in_s);
free_image(in); free_image(in);
cvWaitKey(10);
gettimeofday(&tval_after, NULL); gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result); timersub(&tval_after, &tval_before, &tval_result);
float curr = 1000000.f/((long int)tval_result.tv_usec); float curr = 1000000.f/((long int)tval_result.tv_usec);
@ -1080,6 +1114,7 @@ void run_classifier(int argc, char **argv)
else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights);
else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights);
else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights);

View File

@ -325,14 +325,10 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80); draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80);
save_image(im, "prediction"); save_image(im, "prediction");
show_image(im, "predictions"); show_image(im, "predictions", 0);
free_detections(dets, nboxes); free_detections(dets, nboxes);
free_image(im); free_image(im);
free_image(sized); free_image(sized);
#ifdef OPENCV
cvWaitKey(0);
cvDestroyAllWindows();
#endif
if (filename) break; if (filename) break;
} }
} }

View File

@ -14,6 +14,7 @@ extern void run_nightmare(int argc, char **argv);
extern void run_classifier(int argc, char **argv); extern void run_classifier(int argc, char **argv);
extern void run_regressor(int argc, char **argv); extern void run_regressor(int argc, char **argv);
extern void run_segmenter(int argc, char **argv); extern void run_segmenter(int argc, char **argv);
extern void run_isegmenter(int argc, char **argv);
extern void run_char_rnn(int argc, char **argv); extern void run_char_rnn(int argc, char **argv);
extern void run_tag(int argc, char **argv); extern void run_tag(int argc, char **argv);
extern void run_cifar(int argc, char **argv); extern void run_cifar(int argc, char **argv);
@ -452,6 +453,8 @@ int main(int argc, char **argv)
run_classifier(argc, argv); run_classifier(argc, argv);
} else if (0 == strcmp(argv[1], "regressor")){ } else if (0 == strcmp(argv[1], "regressor")){
run_regressor(argc, argv); run_regressor(argc, argv);
} else if (0 == strcmp(argv[1], "isegmenter")){
run_isegmenter(argc, argv);
} else if (0 == strcmp(argv[1], "segmenter")){ } else if (0 == strcmp(argv[1], "segmenter")){
run_segmenter(argc, argv); run_segmenter(argc, argv);
} else if (0 == strcmp(argv[1], "art")){ } else if (0 == strcmp(argv[1], "art")){

View File

@ -613,9 +613,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
if(fullscreen){ if(fullscreen){
cvSetWindowProperty("predictions", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); cvSetWindowProperty("predictions", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
} }
show_image(im, "predictions"); show_image(im, "predictions", 0);
cvWaitKey(0);
cvDestroyAllWindows();
#endif #endif
} }

View File

@ -0,0 +1,265 @@
#include "darknet.h"
#include <sys/time.h>
#include <assert.h>
void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display)
{
int i;
float avg_loss = -1;
char *base = basecfg(cfgfile);
printf("%s\n", base);
printf("%d\n", ngpus);
network **nets = calloc(ngpus, sizeof(network*));
srand(time(0));
int seed = rand();
for(i = 0; i < ngpus; ++i){
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);
#endif
nets[i] = load_network(cfgfile, weightfile, clear);
nets[i]->learning_rate *= ngpus;
}
srand(time(0));
network *net = nets[0];
image pred = get_network_image(net);
int div = net->w/pred.w;
assert(pred.w * div == net->w);
assert(pred.h * div == net->h);
int imgs = net->batch * net->subdivisions * ngpus;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
list *options = read_data_cfg(datacfg);
char *backup_directory = option_find_str(options, "backup", "/backup/");
char *train_list = option_find_str(options, "train", "data/train.list");
list *plist = get_paths(train_list);
char **paths = (char **)list_to_array(plist);
printf("%d\n", plist->size);
int N = plist->size;
load_args args = {0};
args.w = net->w;
args.h = net->h;
args.threads = 32;
args.scale = div;
args.num_boxes = 90;
args.min = net->min_crop;
args.max = net->max_crop;
args.angle = net->angle;
args.aspect = net->aspect;
args.exposure = net->exposure;
args.saturation = net->saturation;
args.hue = net->hue;
args.size = net->w;
args.classes = 80;
args.paths = paths;
args.n = imgs;
args.m = N;
args.type = ISEG_DATA;
data train;
data buffer;
pthread_t load_thread;
args.d = &buffer;
load_thread = load_data(args);
int epoch = (*net->seen)/N;
while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
double time = what_time_is_it_now();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data(args);
printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
time = what_time_is_it_now();
float loss = 0;
#ifdef GPU
if(ngpus == 1){
loss = train_network(net, train);
} else {
loss = train_networks(nets, ngpus, train, 4);
}
#else
loss = train_network(net, train);
#endif
if(display){
image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]);
image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]);
pred.c = 80;
image mask = mask_to_rgb(tr);
image prmask = mask_to_rgb(pred);
show_image(im, "input", 1);
show_image(prmask, "pred", 1);
show_image(mask, "truth", 100);
free_image(mask);
free_image(prmask);
}
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
free_data(train);
if(*net->seen/N > epoch){
epoch = *net->seen/N;
char buff[256];
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
save_weights(net, buff);
}
if(get_current_batch(net)%100 == 0){
char buff[256];
sprintf(buff, "%s/%s.backup",backup_directory,base);
save_weights(net, buff);
}
}
char buff[256];
sprintf(buff, "%s/%s.weights", backup_directory, base);
save_weights(net, buff);
free_network(net);
free_ptrs((void**)paths, plist->size);
free_list(plist);
free(base);
}
void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename)
{
network *net = load_network(cfg, weights, 0);
set_batch_network(net, 1);
srand(2222222);
clock_t time;
char buff[256];
char *input = buff;
while(1){
if(filename){
strncpy(input, filename, 256);
}else{
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input, 0, 0);
image sized = letterbox_image(im, net->w, net->h);
float *X = sized.data;
time=clock();
float *predictions = network_predict(net, X);
image pred = get_network_image(net);
image prmask = mask_to_rgb(pred);
printf("Predicted: %f\n", predictions[0]);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
show_image(sized, "orig", 1);
show_image(prmask, "pred", 0);
free_image(im);
free_image(sized);
free_image(prmask);
if (filename) break;
}
}
void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename)
{
#ifdef OPENCV
printf("Classifier Demo\n");
network *net = load_network(cfg, weights, 0);
set_batch_network(net, 1);
srand(2222222);
CvCapture * cap;
if(filename){
cap = cvCaptureFromFile(filename);
}else{
cap = cvCaptureFromCAM(cam_index);
}
if(!cap) error("Couldn't connect to webcam.\n");
cvNamedWindow("Segmenter", CV_WINDOW_NORMAL);
cvResizeWindow("Segmenter", 512, 512);
float fps = 0;
while(1){
struct timeval tval_before, tval_after, tval_result;
gettimeofday(&tval_before, NULL);
image in = get_image_from_stream(cap);
image in_s = letterbox_image(in, net->w, net->h);
network_predict(net, in_s.data);
printf("\033[2J");
printf("\033[1;1H");
printf("\nFPS:%.0f\n",fps);
image pred = get_network_image(net);
image prmask = mask_to_rgb(pred);
show_image(prmask, "Segmenter", 10);
free_image(in_s);
free_image(in);
free_image(prmask);
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
float curr = 1000000.f/((long int)tval_result.tv_usec);
fps = .9*fps + .1*curr;
}
#endif
}
void run_isegmenter(int argc, char **argv)
{
if(argc < 4){
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
if(gpu_list){
printf("%s\n", gpu_list);
int len = strlen(gpu_list);
ngpus = 1;
int i;
for(i = 0; i < len; ++i){
if (gpu_list[i] == ',') ++ngpus;
}
gpus = calloc(ngpus, sizeof(int));
for(i = 0; i < ngpus; ++i){
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',')+1;
}
} else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
}
int cam_index = find_int_arg(argc, argv, "-c", 0);
int clear = find_arg(argc, argv, "-clear");
int display = find_arg(argc, argv, "-display");
char *data = argv[3];
char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0;
char *filename = (argc > 6) ? argv[6]: 0;
if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename);
else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display);
else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename);
}

View File

@ -460,13 +460,9 @@ void inter_dcgan(char *cfgfile, char *weightfile)
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
//char buff[256]; //char buff[256];
sprintf(buff, "out%05d", c); sprintf(buff, "out%05d", c);
show_image(out, "out");
save_image(out, "out"); save_image(out, "out");
save_image(out, buff); save_image(out, buff);
#ifdef OPENCV show_image(out, "out", 0);
//cvWaitKey(0);
#endif
} }
} }
@ -499,11 +495,8 @@ void test_dcgan(char *cfgfile, char *weightfile)
//yuv_to_rgb(out); //yuv_to_rgb(out);
normalize_image(out); normalize_image(out);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
show_image(out, "out");
save_image(out, "out"); save_image(out, "out");
#ifdef OPENCV show_image(out, "out", 0);
cvWaitKey(0);
#endif
free_image(im); free_image(im);
} }
@ -639,11 +632,10 @@ void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, i
if(display){ if(display){
image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]);
image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
show_image(im, "gen"); show_image(im, "gen", 1);
show_image(im2, "train"); show_image(im2, "train", 1);
save_image(im, "gen"); save_image(im, "gen");
save_image(im2, "train"); save_image(im2, "train");
cvWaitKey(1);
} }
#endif #endif
@ -826,11 +818,10 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
if(display){ if(display){
image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]);
image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
show_image(im, "gen"); show_image(im, "gen", 1);
show_image(im2, "train"); show_image(im2, "train", 1);
save_image(im, "gen"); save_image(im, "gen");
save_image(im2, "train"); save_image(im2, "train");
cvWaitKey(1);
} }
#endif #endif
@ -1010,9 +1001,8 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
if(display){ if(display){
image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]);
image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]);
show_image(im, "gen"); show_image(im, "gen", 1);
show_image(im2, "train"); show_image(im2, "train", 1);
cvWaitKey(1);
} }
#endif #endif
free_data(merge); free_data(merge);
@ -1342,12 +1332,9 @@ void test_lsd(char *cfg, char *weights, char *filename, int gray)
//yuv_to_rgb(out); //yuv_to_rgb(out);
constrain_image(out); constrain_image(out);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
show_image(out, "out");
show_image(crop, "crop");
save_image(out, "out"); save_image(out, "out");
#ifdef OPENCV show_image(out, "out", 1);
cvWaitKey(0); show_image(crop, "crop", 0);
#endif
free_image(im); free_image(im);
free_image(resized); free_image(resized);

View File

@ -376,10 +376,7 @@ void run_nightmare(int argc, char **argv)
if(reconstruct){ if(reconstruct){
reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1);
//if ((n+1)%30 == 0) rate *= .5; //if ((n+1)%30 == 0) rate *= .5;
show_image(im, "reconstruction"); show_image(im, "reconstruction", 10);
#ifdef OPENCV
cvWaitKey(10);
#endif
}else{ }else{
int layer = max_layer + rand()%range - range/2; int layer = max_layer + rand()%range - range/2;
int octave = rand()%octaves; int octave = rand()%octaves;
@ -400,8 +397,7 @@ void run_nightmare(int argc, char **argv)
} }
printf("%d %s\n", e, buff); printf("%d %s\n", e, buff);
save_image(im, buff); save_image(im, buff);
//show_image(im, buff); //show_image(im, buff, 0);
//cvWaitKey(0);
if(rotate){ if(rotate){
image rot = rotate_image(im, rotate); image rot = rotate_image(im, rotate);

View File

@ -179,7 +179,6 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
image in = get_image_from_stream(cap); image in = get_image_from_stream(cap);
image crop = center_crop_image(in, net->w, net->h); image crop = center_crop_image(in, net->w, net->h);
grayscale_image_3c(crop); grayscale_image_3c(crop);
show_image(crop, "Regressor");
float *predictions = network_predict(net, crop.data); float *predictions = network_predict(net, crop.data);
@ -192,11 +191,10 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
printf("%s: %f\n", names[i], predictions[i]); printf("%s: %f\n", names[i], predictions[i]);
} }
show_image(crop, "Regressor", 10);
free_image(in); free_image(in);
free_image(crop); free_image(crop);
cvWaitKey(10);
gettimeofday(&tval_after, NULL); gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result); timersub(&tval_after, &tval_before, &tval_result);
float curr = 1000000.f/((long int)tval_result.tv_usec); float curr = 1000000.f/((long int)tval_result.tv_usec);

View File

@ -42,7 +42,6 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
char **paths = (char **)list_to_array(plist); char **paths = (char **)list_to_array(plist);
printf("%d\n", plist->size); printf("%d\n", plist->size);
int N = plist->size; int N = plist->size;
clock_t time;
load_args args = {0}; load_args args = {0};
args.w = net->w; args.w = net->w;
@ -73,14 +72,14 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
int epoch = (*net->seen)/N; int epoch = (*net->seen)/N;
while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
time=clock(); double time = what_time_is_it_now();
pthread_join(load_thread, 0); pthread_join(load_thread, 0);
train = buffer; train = buffer;
load_thread = load_data(args); load_thread = load_data(args);
printf("Loaded: %lf seconds\n", sec(clock()-time)); printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
time=clock(); time = what_time_is_it_now();
float loss = 0; float loss = 0;
#ifdef GPU #ifdef GPU
@ -97,18 +96,15 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]);
image mask = mask_to_rgb(tr); image mask = mask_to_rgb(tr);
image prmask = mask_to_rgb(pred); image prmask = mask_to_rgb(pred);
show_image(im, "input"); show_image(im, "input", 1);
show_image(prmask, "pred"); show_image(prmask, "pred", 1);
show_image(mask, "truth"); show_image(mask, "truth", 100);
#ifdef OPENCV
cvWaitKey(100);
#endif
free_image(mask); free_image(mask);
free_image(prmask); free_image(prmask);
} }
if(avg_loss == -1) avg_loss = loss; if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1; avg_loss = avg_loss*.9 + loss*.1;
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
free_data(train); free_data(train);
if(*net->seen/N > epoch){ if(*net->seen/N > epoch){
epoch = *net->seen/N; epoch = *net->seen/N;
@ -159,13 +155,10 @@ void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename)
float *predictions = network_predict(net, X); float *predictions = network_predict(net, X);
image pred = get_network_image(net); image pred = get_network_image(net);
image prmask = mask_to_rgb(pred); image prmask = mask_to_rgb(pred);
show_image(sized, "orig");
show_image(prmask, "pred");
#ifdef OPENCV
cvWaitKey(0);
#endif
printf("Predicted: %f\n", predictions[0]); printf("Predicted: %f\n", predictions[0]);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
show_image(sized, "orig", 1);
show_image(prmask, "pred", 0);
free_image(im); free_image(im);
free_image(sized); free_image(sized);
free_image(prmask); free_image(prmask);
@ -210,14 +203,12 @@ void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, cons
image pred = get_network_image(net); image pred = get_network_image(net);
image prmask = mask_to_rgb(pred); image prmask = mask_to_rgb(pred);
show_image(prmask, "Segmenter"); show_image(prmask, "Segmenter", 10);
free_image(in_s); free_image(in_s);
free_image(in); free_image(in);
free_image(prmask); free_image(prmask);
cvWaitKey(10);
gettimeofday(&tval_after, NULL); gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result); timersub(&tval_after, &tval_before, &tval_result);
float curr = 1000000.f/((long int)tval_result.tv_usec); float curr = 1000000.f/((long int)tval_result.tv_usec);

View File

@ -93,7 +93,7 @@ void test_super(char *cfgfile, char *weightfile, char *filename)
image out = get_network_image(net); image out = get_network_image(net);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
save_image(out, "out"); save_image(out, "out");
show_image(out, "out"); show_image(out, "out", 0);
free_image(im); free_image(im);
if (filename) break; if (filename) break;

View File

@ -296,14 +296,10 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20); draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20);
save_image(im, "predictions"); save_image(im, "predictions");
show_image(im, "predictions"); show_image(im, "predictions", 0);
free_detections(dets, nboxes); free_detections(dets, nboxes);
free_image(im); free_image(im);
free_image(sized); free_image(sized);
#ifdef OPENCV
cvWaitKey(0);
cvDestroyAllWindows();
#endif
if (filename) break; if (filename) break;
} }
} }

View File

@ -86,6 +86,7 @@ typedef enum {
XNOR, XNOR,
REGION, REGION,
YOLO, YOLO,
ISEG,
REORG, REORG,
UPSAMPLE, UPSAMPLE,
LOGXENT, LOGXENT,
@ -166,6 +167,7 @@ struct layer{
float ratio; float ratio;
float learning_rate_scale; float learning_rate_scale;
float clip; float clip;
int noloss;
int softmax; int softmax;
int classes; int classes;
int coords; int coords;
@ -203,6 +205,7 @@ struct layer{
int dontload; int dontload;
int dontsave; int dontsave;
int dontloadscales; int dontloadscales;
int numload;
float temperature; float temperature;
float probability; float probability;
@ -213,6 +216,8 @@ struct layer{
int * input_layers; int * input_layers;
int * input_sizes; int * input_sizes;
int * map; int * map;
int * counts;
float ** sums;
float * rand; float * rand;
float * cost; float * cost;
float * state; float * state;
@ -540,7 +545,7 @@ typedef struct{
} data; } data;
typedef enum { typedef enum {
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA
} data_type; } data_type;
typedef struct load_args{ typedef struct load_args{
@ -705,7 +710,7 @@ int resize_network(network *net, int w, int h);
void free_matrix(matrix m); void free_matrix(matrix m);
void test_resize(char *filename); void test_resize(char *filename);
void save_image(image p, const char *name); void save_image(image p, const char *name);
void show_image(image p, const char *name); int show_image(image p, const char *name, int ms);
image copy_image(image p); image copy_image(image p);
void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
float get_current_rate(network *net); float get_current_rate(network *net);

View File

@ -151,7 +151,7 @@ void cudnn_convolutional_setup(layer *l)
l->convDesc, l->convDesc,
l->dstTensorDesc, l->dstTensorDesc,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
4000000000, 2000000000,
&l->fw_algo); &l->fw_algo);
cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
l->weightDesc, l->weightDesc,
@ -159,7 +159,7 @@ void cudnn_convolutional_setup(layer *l)
l->convDesc, l->convDesc,
l->dsrcTensorDesc, l->dsrcTensorDesc,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
4000000000, 2000000000,
&l->bd_algo); &l->bd_algo);
cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
l->srcTensorDesc, l->srcTensorDesc,
@ -167,7 +167,7 @@ void cudnn_convolutional_setup(layer *l)
l->convDesc, l->convDesc,
l->dweightDesc, l->dweightDesc,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
4000000000, 2000000000,
&l->bf_algo); &l->bf_algo);
} }
#endif #endif

View File

@ -361,6 +361,44 @@ box bound_image(image im)
} }
void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh)
{
char labelpath[4096];
find_replace(path, "images", "mask", labelpath);
find_replace(labelpath, "JPEGImages", "mask", labelpath);
find_replace(labelpath, ".jpg", ".txt", labelpath);
find_replace(labelpath, ".JPG", ".txt", labelpath);
find_replace(labelpath, ".JPEG", ".txt", labelpath);
FILE *file = fopen(labelpath, "r");
if(!file) file_error(labelpath);
char buff[32788];
int id;
int i = 0;
int j;
image part = make_image(w, h, 1);
while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){
int n = 0;
int *rle = read_intlist(buff, &n, 0);
load_rle(part, rle, n);
image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect);
if(flip) flip_image(sized);
image mask = resize_image(sized, mw, mh);
truth[i*(mw*mh+1)] = id;
for(j = 0; j < mw*mh; ++j){
truth[i*(mw*mh + 1) + 1 + j] = mask.data[j];
}
++i;
free_image(mask);
free_image(sized);
free(rle);
}
if(i < num_boxes) truth[i*(mw*mh+1)] = -1;
fclose(file);
free_image(part);
}
void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh)
{ {
char labelpath[4096]; char labelpath[4096];
find_replace(path, "images", "mask", labelpath); find_replace(path, "images", "mask", labelpath);
@ -743,7 +781,47 @@ data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int mi
return d; return d;
} }
data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure)
{
char **random_paths = get_random_paths(paths, n, m);
int i;
data d = {0};
d.shallow = 0;
d.X.rows = n;
d.X.vals = calloc(d.X.rows, sizeof(float*));
d.X.cols = h*w*3;
d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes);
for(i = 0; i < n; ++i){
image orig = load_image_color(random_paths[i], 0, 0);
augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h);
image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect);
int flip = rand()%2;
if(flip) flip_image(sized);
random_distort_image(sized, hue, saturation, exposure);
d.X.vals[i] = sized.data;
//show_image(sized, "image");
fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div);
free_image(orig);
/*
image rgb = mask_to_rgb(sized_m, classes);
show_image(rgb, "part");
show_image(sized, "orig");
cvWaitKey(0);
free_image(rgb);
*/
}
free(random_paths);
return d;
}
data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure)
{ {
char **random_paths = get_random_paths(paths, n, m); char **random_paths = get_random_paths(paths, n, m);
int i; int i;
@ -767,7 +845,7 @@ data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int b
d.X.vals[i] = sized.data; d.X.vals[i] = sized.data;
//show_image(sized, "image"); //show_image(sized, "image");
fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14); fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14);
free_image(orig); free_image(orig);
@ -975,7 +1053,8 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
float dh = jitter * orig.h; float dh = jitter * orig.h;
float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh)); float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh));
float scale = rand_uniform(.25, 2); //float scale = rand_uniform(.25, 2);
float scale = 1;
float nw, nh; float nw, nh;
@ -1025,8 +1104,10 @@ void *load_thread(void *ptr)
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == WRITING_DATA){ } else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == ISEG_DATA){
*a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == INSTANCE_DATA){ } else if (a.type == INSTANCE_DATA){
*a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == SEGMENTATION_DATA){ } else if (a.type == SEGMENTATION_DATA){
*a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale); *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale);
} else if (a.type == REGION_DATA){ } else if (a.type == REGION_DATA){
@ -1212,7 +1293,7 @@ data *tile_data(data orig, int divs, int size)
{ {
data *ds = calloc(divs*divs, sizeof(data)); data *ds = calloc(divs*divs, sizeof(data));
int i, j; int i, j;
#pragma omp parallel for #pragma omp parallel for
for(i = 0; i < divs*divs; ++i){ for(i = 0; i < divs*divs; ++i){
data d; data d;
d.shallow = 0; d.shallow = 0;
@ -1223,7 +1304,7 @@ data *tile_data(data orig, int divs, int size)
d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.vals = calloc(d.X.rows, sizeof(float*));
d.y = copy_matrix(orig.y); d.y = copy_matrix(orig.y);
#pragma omp parallel for #pragma omp parallel for
for(j = 0; j < orig.X.rows; ++j){ for(j = 0; j < orig.X.rows; ++j){
int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2; int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2;
int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2; int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2;
@ -1247,7 +1328,7 @@ data resize_data(data orig, int w, int h)
d.X.vals = calloc(d.X.rows, sizeof(float*)); d.X.vals = calloc(d.X.rows, sizeof(float*));
d.y = copy_matrix(orig.y); d.y = copy_matrix(orig.y);
#pragma omp parallel for #pragma omp parallel for
for(i = 0; i < orig.X.rows; ++i){ for(i = 0; i < orig.X.rows; ++i){
image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]); image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]);
d.X.vals[i] = resize_image(im, w, h).data; d.X.vals[i] = resize_image(im, w, h).data;

View File

@ -572,7 +572,7 @@ void show_image_cv(image p, const char *name, IplImage *disp)
} }
#endif #endif
void show_image(image p, const char *name) int show_image(image p, const char *name, int ms)
{ {
#ifdef OPENCV #ifdef OPENCV
IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c); IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
@ -581,9 +581,13 @@ void show_image(image p, const char *name)
show_image_cv(copy, name, disp); show_image_cv(copy, name, disp);
free_image(copy); free_image(copy);
cvReleaseImage(&disp); cvReleaseImage(&disp);
int c = cvWaitKey(ms);
if (c != -1) c = c%256;
return c;
#else #else
fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
save_image(p, name); save_image(p, name);
return 0;
#endif #endif
} }
@ -727,7 +731,7 @@ void show_image_layers(image p, char *name)
for(i = 0; i < p.c; ++i){ for(i = 0; i < p.c; ++i){
sprintf(buff, "%s - Layer %d", name, i); sprintf(buff, "%s - Layer %d", name, i);
image layer = get_image_layer(p, i); image layer = get_image_layer(p, i);
show_image(layer, buff); show_image(layer, buff, 1);
free_image(layer); free_image(layer);
} }
} }
@ -735,7 +739,7 @@ void show_image_layers(image p, char *name)
void show_image_collapsed(image p, char *name) void show_image_collapsed(image p, char *name)
{ {
image c = collapse_image_layers(p, 1); image c = collapse_image_layers(p, 1);
show_image(c, name); show_image(c, name, 1);
free_image(c); free_image(c);
} }
@ -1406,16 +1410,16 @@ void test_resize(char *filename)
distort_image(c4, .1, .66666, 1.5); distort_image(c4, .1, .66666, 1.5);
show_image(im, "Original"); show_image(im, "Original", 1);
show_image(gray, "Gray"); show_image(gray, "Gray", 1);
show_image(c1, "C1"); show_image(c1, "C1", 1);
show_image(c2, "C2"); show_image(c2, "C2", 1);
show_image(c3, "C3"); show_image(c3, "C3", 1);
show_image(c4, "C4"); show_image(c4, "C4", 1);
#ifdef OPENCV #ifdef OPENCV
while(1){ while(1){
image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320);
show_image(aug, "aug"); show_image(aug, "aug", 1);
free_image(aug); free_image(aug);
@ -1430,7 +1434,7 @@ void test_resize(char *filename)
float dhue = rand_uniform(-hue, hue); float dhue = rand_uniform(-hue, hue);
distort_image(c, dhue, dsat, dexp); distort_image(c, dhue, dsat, dexp);
show_image(c, "rand"); show_image(c, "rand", 1);
printf("%f %f %f\n", dhue, dsat, dexp); printf("%f %f %f\n", dhue, dsat, dexp);
free_image(c); free_image(c);
cvWaitKey(0); cvWaitKey(0);
@ -1585,7 +1589,7 @@ void show_image_normalized(image im, const char *name)
{ {
image c = copy_image(im); image c = copy_image(im);
normalize_image(c); normalize_image(c);
show_image(c, name); show_image(c, name, 1);
free_image(c); free_image(c);
} }
@ -1603,7 +1607,7 @@ void show_images(image *ims, int n, char *window)
*/ */
normalize_image(m); normalize_image(m);
save_image(m, window); save_image(m, window);
show_image(m, window); show_image(m, window, 1);
free_image(m); free_image(m);
} }

219
src/iseg_layer.c Normal file
View File

@ -0,0 +1,219 @@
#include "iseg_layer.h"
#include "activations.h"
#include "blas.h"
#include "box.h"
#include "cuda.h"
#include "utils.h"
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
layer make_iseg_layer(int batch, int w, int h, int classes, int ids)
{
layer l = {0};
l.type = ISEG;
l.h = h;
l.w = w;
l.c = classes + ids;
l.out_w = l.w;
l.out_h = l.h;
l.out_c = l.c;
l.classes = classes;
l.batch = batch;
l.extra = ids;
l.cost = calloc(1, sizeof(float));
l.outputs = h*w*l.c;
l.inputs = l.outputs;
l.truths = 90*(l.w*l.h+1);
l.delta = calloc(batch*l.outputs, sizeof(float));
l.output = calloc(batch*l.outputs, sizeof(float));
l.counts = calloc(90, sizeof(int));
l.sums = calloc(90, sizeof(float*));
if(ids){
int i;
for(i = 0; i < 90; ++i){
l.sums[i] = calloc(ids, sizeof(float));
}
}
l.forward = forward_iseg_layer;
l.backward = backward_iseg_layer;
#ifdef GPU
l.forward_gpu = forward_iseg_layer_gpu;
l.backward_gpu = backward_iseg_layer_gpu;
l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
#endif
fprintf(stderr, "iseg\n");
srand(0);
return l;
}
void resize_iseg_layer(layer *l, int w, int h)
{
l->w = w;
l->h = h;
l->outputs = h*w*l->c;
l->inputs = l->outputs;
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
#ifdef GPU
cuda_free(l->delta_gpu);
cuda_free(l->output_gpu);
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
#endif
}
void forward_iseg_layer(const layer l, network net)
{
double time = what_time_is_it_now();
int i,b,j,k;
int ids = l.extra;
memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
#ifndef GPU
for (b = 0; b < l.batch; ++b){
int index = b*l.outputs;
activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC);
}
#endif
for (b = 0; b < l.batch; ++b){
// a priori, each pixel has no class
for(i = 0; i < l.classes; ++i){
for(k = 0; k < l.w*l.h; ++k){
int index = b*l.outputs + i*l.w*l.h + k;
l.delta[index] = 0 - l.output[index];
}
}
// a priori, embedding should be small magnitude
for(i = 0; i < ids; ++i){
for(k = 0; k < l.w*l.h; ++k){
int index = b*l.outputs + (i+l.classes)*l.w*l.h + k;
l.delta[index] = .1 * (0 - l.output[index]);
}
}
memset(l.counts, 0, 90*sizeof(float));
for(i = 0; i < 90; ++i){
l.counts[i] = 0;
fill_cpu(ids, 0, l.sums[i], 1);
int c = net.truth[b*l.truths + i*(l.w*l.h+1)];
if(c < 0) break;
// add up metric embeddings for each instance
for(k = 0; k < l.w*l.h; ++k){
int index = b*l.outputs + c*l.w*l.h + k;
float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k];
if(v){
l.delta[index] = v - l.output[index];
axpy_cpu(ids, 1, l.output + b*l.outputs + l.classes*l.w*l.h + k, l.w*l.h, l.sums[i], 1);
++l.counts[i];
}
}
}
float *mse = calloc(90, sizeof(float));
for(i = 0; i < 90; ++i){
int c = net.truth[b*l.truths + i*(l.w*l.h+1)];
if(c < 0) break;
for(k = 0; k < l.w*l.h; ++k){
float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k];
if(v){
int z;
float sum = 0;
for(z = 0; z < ids; ++z){
int index = b*l.outputs + (l.classes + z)*l.w*l.h + k;
sum += pow(l.sums[i][z]/l.counts[i] - l.output[index], 2);
}
mse[i] += sum;
}
}
mse[i] /= l.counts[i];
}
// Calculate average embedding
for(i = 0; i < 90; ++i){
if(!l.counts[i]) continue;
scal_cpu(ids, 1.f/l.counts[i], l.sums[i], 1);
if(b == 0 && net.gpu_index == 0){
printf("%4d, %6.3f, ", l.counts[i], mse[i]);
for(j = 0; j < ids/4; ++j){
printf("%6.3f,", l.sums[i][j]);
}
printf("\n");
}
}
free(mse);
// Calculate embedding loss
for(i = 0; i < 90; ++i){
if(!l.counts[i]) continue;
for(k = 0; k < l.w*l.h; ++k){
float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k];
if(v){
for(j = 0; j < 90; ++j){
if(!l.counts[j])continue;
int z;
for(z = 0; z < ids; ++z){
int index = b*l.outputs + (l.classes + z)*l.w*l.h + k;
float diff = l.sums[j][z] - l.output[index];
if (j == i) l.delta[index] += diff < 0? -.1 : .1;
else l.delta[index] += -(diff < 0? -.1 : .1);
}
}
}
}
}
}
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
printf("took %lf sec\n", what_time_is_it_now() - time);
}
void backward_iseg_layer(const layer l, network net)
{
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1);
}
#ifdef GPU
void forward_iseg_layer_gpu(const layer l, network net)
{
copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1);
int b;
for (b = 0; b < l.batch; ++b){
activate_array_gpu(l.output_gpu + b*l.outputs, l.classes*l.w*l.h, LOGISTIC);
//if(l.extra) activate_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC);
}
cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs);
forward_iseg_layer(l, net);
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
}
void backward_iseg_layer_gpu(const layer l, network net)
{
int b;
for (b = 0; b < l.batch; ++b){
//if(l.extra) gradient_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC, l.delta_gpu + b*l.outputs + l.classes*l.w*l.h);
}
axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

19
src/iseg_layer.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef ISEG_LAYER_H
#define ISEG_LAYER_H
#include "darknet.h"
#include "layer.h"
#include "network.h"
layer make_iseg_layer(int batch, int w, int h, int classes, int ids);
void forward_iseg_layer(const layer l, network net);
void backward_iseg_layer(const layer l, network net);
void resize_iseg_layer(layer *l, int w, int h);
int iseg_num_detections(layer l, float thresh);
#ifdef GPU
void forward_iseg_layer_gpu(const layer l, network net);
void backward_iseg_layer_gpu(layer l, network net);
#endif
#endif

View File

@ -27,8 +27,8 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
l.w = w; l.w = w;
l.c = c; l.c = c;
l.pad = padding; l.pad = padding;
l.out_w = (w + 2*padding - size)/stride + 1; l.out_w = (w + padding - size)/stride + 1;
l.out_h = (h + 2*padding - size)/stride + 1; l.out_h = (h + padding - size)/stride + 1;
l.out_c = c; l.out_c = c;
l.outputs = l.out_h * l.out_w * l.out_c; l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = h*w*c; l.inputs = h*w*c;
@ -57,8 +57,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
l->w = w; l->w = w;
l->inputs = h*w*l->c; l->inputs = h*w*l->c;
l->out_w = (w + 2*l->pad - l->size)/l->stride + 1; l->out_w = (w + l->pad - l->size)/l->stride + 1;
l->out_h = (h + 2*l->pad - l->size)/l->stride + 1; l->out_h = (h + l->pad - l->size)/l->stride + 1;
l->outputs = l->out_w * l->out_h * l->c; l->outputs = l->out_w * l->out_h * l->c;
int output_size = l->outputs * l->batch; int output_size = l->outputs * l->batch;
@ -79,8 +79,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
void forward_maxpool_layer(const maxpool_layer l, network net) void forward_maxpool_layer(const maxpool_layer l, network net)
{ {
int b,i,j,k,m,n; int b,i,j,k,m,n;
int w_offset = -l.pad; int w_offset = -l.pad/l.stride;
int h_offset = -l.pad; int h_offset = -l.pad/l.stride;
int h = l.out_h; int h = l.out_h;
int w = l.out_w; int w = l.out_w;

View File

@ -9,8 +9,8 @@ extern "C" {
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
{ {
int h = (in_h + 2*pad - size)/stride + 1; int h = (in_h + pad - size)/stride + 1;
int w = (in_w + 2*pad - size)/stride + 1; int w = (in_w + pad - size)/stride + 1;
int c = in_c; int c = in_c;
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@ -24,8 +24,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
id /= c; id /= c;
int b = id; int b = id;
int w_offset = -pad; int w_offset = -pad/2;
int h_offset = -pad; int h_offset = -pad/2;
int out_index = j + w*(i + h*(k + c*b)); int out_index = j + w*(i + h*(k + c*b));
float max = -INFINITY; float max = -INFINITY;
@ -49,8 +49,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
{ {
int h = (in_h + 2*pad - size)/stride + 1; int h = (in_h + pad - size)/stride + 1;
int w = (in_w + 2*pad - size)/stride + 1; int w = (in_w + pad - size)/stride + 1;
int c = in_c; int c = in_c;
int area = (size-1)/stride; int area = (size-1)/stride;
@ -66,8 +66,8 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
id /= in_c; id /= in_c;
int b = id; int b = id;
int w_offset = -pad; int w_offset = -pad/2;
int h_offset = -pad; int h_offset = -pad/2;
float d = 0; float d = 0;
int l, m; int l, m;

View File

@ -27,6 +27,7 @@
#include "parser.h" #include "parser.h"
#include "region_layer.h" #include "region_layer.h"
#include "yolo_layer.h" #include "yolo_layer.h"
#include "iseg_layer.h"
#include "reorg_layer.h" #include "reorg_layer.h"
#include "rnn_layer.h" #include "rnn_layer.h"
#include "route_layer.h" #include "route_layer.h"
@ -52,6 +53,7 @@ LAYER_TYPE string_to_layer_type(char * type)
if (strcmp(type, "[detection]")==0) return DETECTION; if (strcmp(type, "[detection]")==0) return DETECTION;
if (strcmp(type, "[region]")==0) return REGION; if (strcmp(type, "[region]")==0) return REGION;
if (strcmp(type, "[yolo]")==0) return YOLO; if (strcmp(type, "[yolo]")==0) return YOLO;
if (strcmp(type, "[iseg]")==0) return ISEG;
if (strcmp(type, "[local]")==0) return LOCAL; if (strcmp(type, "[local]")==0) return LOCAL;
if (strcmp(type, "[conv]")==0 if (strcmp(type, "[conv]")==0
|| strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
@ -265,18 +267,19 @@ layer parse_connected(list *options, size_params params)
return l; return l;
} }
softmax_layer parse_softmax(list *options, size_params params) layer parse_softmax(list *options, size_params params)
{ {
int groups = option_find_int_quiet(options, "groups",1); int groups = option_find_int_quiet(options, "groups",1);
softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups); layer l = make_softmax_layer(params.batch, params.inputs, groups);
layer.temperature = option_find_float_quiet(options, "temperature", 1); l.temperature = option_find_float_quiet(options, "temperature", 1);
char *tree_file = option_find_str(options, "tree", 0); char *tree_file = option_find_str(options, "tree", 0);
if (tree_file) layer.softmax_tree = read_tree(tree_file); if (tree_file) l.softmax_tree = read_tree(tree_file);
layer.w = params.w; l.w = params.w;
layer.h = params.h; l.h = params.h;
layer.c = params.c; l.c = params.c;
layer.spatial = option_find_float_quiet(options, "spatial", 0); l.spatial = option_find_float_quiet(options, "spatial", 0);
return layer; l.noloss = option_find_int_quiet(options, "noloss", 0);
return l;
} }
int *parse_yolo_mask(char *a, int *num) int *parse_yolo_mask(char *a, int *num)
@ -338,6 +341,15 @@ layer parse_yolo(list *options, size_params params)
return l; return l;
} }
layer parse_iseg(list *options, size_params params)
{
int classes = option_find_int(options, "classes", 20);
int ids = option_find_int(options, "ids", 32);
layer l = make_iseg_layer(params.batch, params.w, params.h, classes, ids);
assert(l.outputs == params.inputs);
return l;
}
layer parse_region(list *options, size_params params) layer parse_region(list *options, size_params params)
{ {
int coords = option_find_int(options, "coords", 4); int coords = option_find_int(options, "coords", 4);
@ -472,7 +484,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
{ {
int stride = option_find_int(options, "stride",1); int stride = option_find_int(options, "stride",1);
int size = option_find_int(options, "size",stride); int size = option_find_int(options, "size",stride);
int padding = option_find_int_quiet(options, "padding", (size-1)/2); int padding = option_find_int_quiet(options, "padding", size-1);
int batch,h,w,c; int batch,h,w,c;
h = params.h; h = params.h;
@ -791,6 +803,8 @@ network *parse_network_cfg(char *filename)
l = parse_region(options, params); l = parse_region(options, params);
}else if(lt == YOLO){ }else if(lt == YOLO){
l = parse_yolo(options, params); l = parse_yolo(options, params);
}else if(lt == ISEG){
l = parse_iseg(options, params);
}else if(lt == DETECTION){ }else if(lt == DETECTION){
l = parse_detection(options, params); l = parse_detection(options, params);
}else if(lt == SOFTMAX){ }else if(lt == SOFTMAX){
@ -829,6 +843,7 @@ network *parse_network_cfg(char *filename)
l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontsave = option_find_int_quiet(options, "dontsave", 0); l.dontsave = option_find_int_quiet(options, "dontsave", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0); l.dontload = option_find_int_quiet(options, "dontload", 0);
l.numload = option_find_int_quiet(options, "numload", 0);
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
l.smooth = option_find_float_quiet(options, "smooth", 0); l.smooth = option_find_float_quiet(options, "smooth", 0);
@ -1152,7 +1167,8 @@ void load_convolutional_weights(layer l, FILE *fp)
//load_convolutional_weights_binary(l, fp); //load_convolutional_weights_binary(l, fp);
//return; //return;
} }
int num = l.nweights; if(l.numload) l.n = l.numload;
int num = l.c/l.groups*l.n*l.size*l.size;
fread(l.biases, sizeof(float), l.n, fp); fread(l.biases, sizeof(float), l.n, fp);
if (l.batch_normalize && (!l.dontloadscales)){ if (l.batch_normalize && (!l.dontloadscales)){
fread(l.scales, sizeof(float), l.n, fp); fread(l.scales, sizeof(float), l.n, fp);

View File

@ -50,7 +50,7 @@ void forward_softmax_layer(const softmax_layer l, network net)
softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output);
} }
if(net.truth){ if(net.truth && !l.noloss){
softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss);
l.cost[0] = sum_array(l.loss, l.batch*l.inputs); l.cost[0] = sum_array(l.loss, l.batch*l.inputs);
} }
@ -88,7 +88,7 @@ void forward_softmax_layer_gpu(const softmax_layer l, network net)
softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
} }
} }
if(net.truth){ if(net.truth && !l.noloss){
softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu);
if(l.softmax_tree){ if(l.softmax_tree){
mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0);