From 88b9ecb41438cc4cd086720b168b8dba4b02cb8e Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Sat, 27 May 2017 21:41:55 -0700 Subject: [PATCH] In chess, the pawns go first --- Makefile | 2 +- src/classifier.c | 2 +- src/darknet.c | 3 + src/data.c | 128 +++++++++++++++++++++- src/data.h | 3 +- src/image.c | 42 ++++++- src/image.h | 15 ++- src/layer.h | 1 + src/parser.c | 4 + src/region_layer.c | 38 ++++--- src/segmenter.c | 262 ++++++++++++++++++++++++++++++++++++++++++++ src/softmax_layer.c | 6 +- 12 files changed, 476 insertions(+), 30 deletions(-) create mode 100644 src/segmenter.c diff --git a/Makefile b/Makefile index 4f12f70b..3bbaa5c0 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ CFLAGS+= -DCUDNN LDFLAGS+= -lcudnn endif -OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o regressor.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o lsd.o super.o voxel.o tree.o +OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o segmenter.o regressor.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o lsd.o super.o voxel.o tree.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o diff --git a/src/classifier.c b/src/classifier.c index 039ad668..4f6a1a0b 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -700,7 +700,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi float *predictions = network_predict(net, X); if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1, 1); top_k(predictions, net.outputs, top, indexes); - printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); for(i = 0; i < top; ++i){ int index = indexes[i]; //if(net.hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net.hierarchy->parent[index] >= 0) ? names[net.hierarchy->parent[index]] : "Root"); diff --git a/src/darknet.c b/src/darknet.c index 108343df..b69cd436 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -21,6 +21,7 @@ extern void run_dice(int argc, char **argv); extern void run_compare(int argc, char **argv); extern void run_classifier(int argc, char **argv); extern void run_regressor(int argc, char **argv); +extern void run_segmenter(int argc, char **argv); extern void run_char_rnn(int argc, char **argv); extern void run_vid_rnn(int argc, char **argv); extern void run_tag(int argc, char **argv); @@ -442,6 +443,8 @@ int main(int argc, char **argv) run_classifier(argc, argv); } else if (0 == strcmp(argv[1], "regressor")){ run_regressor(argc, argv); + } else if (0 == strcmp(argv[1], "segmenter")){ + run_segmenter(argc, argv); } else if (0 == strcmp(argv[1], "art")){ run_art(argc, argv); } else if (0 == strcmp(argv[1], "tag")){ diff --git a/src/data.c b/src/data.c index e78b17ed..dcc5089b 100644 --- a/src/data.c +++ b/src/data.c @@ -116,7 +116,7 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, if(center){ crop = center_crop_image(im, size, size); } else { - crop = random_augment_image(im, angle, aspect, min, max, size); + crop = random_augment_image(im, angle, aspect, min, max, size, size); } int flip = rand()%2; if (flip) flip_image(crop); @@ -511,6 +511,119 @@ void free_data(data d) } } +void load_rle(image im, int *rle, int n) +{ + int count = 0; + int curr = 0; + int i,j; + for(i = 0; i < n; ++i){ + for(j = 0; j < rle[i]; ++j){ + im.data[count++] = curr; + } + curr = 1 - curr; + } + for(; count < im.h*im.w*im.c; ++count){ + im.data[count] = curr; + } +} + +void or_image(image src, image dest, int c) +{ + int i; + for(i = 0; i < src.w*src.h; ++i){ + if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1; + } +} + +void fill_bg_mask(image m) +{ + int i,k; + int index = m.w*m.h*(m.c-1); + for(i = 0; i < m.w*m.h; ++i){ + m.data[index + i] = 1; + } + for(k = 0; k < m.c-1; ++k){ + for(i = 0; i < m.w*m.h; ++i){ + if(m.data[index + i] && m.data[k*m.w*m.h + i]) m.data[index + i] = 0; + } + } +} + +image get_segmentation_image(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes+1); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + free(rle); + } + fill_bg_mask(mask); + fclose(file); + free_image(part); + return mask; +} + +data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + d.y.rows = n; + d.y.cols = h*w*(classes+1); + d.y.vals = calloc(d.X.rows, sizeof(float*)); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes); + //image mask = make_image(orig.w, orig.h, classes+1); + image sized_m = rotate_crop_image(mask, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + if(flip) flip_image(sized_m); + d.y.vals[i] = sized_m.data; + + free_image(orig); + free_image(mask); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) { char **random_paths = get_random_paths(paths, n, m); @@ -698,7 +811,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in image orig = load_image_color(random_paths[i], 0, 0); image sized = make_image(w, h, orig.c); fill_image(sized, .5); - + float dw = jitter * orig.w; float dh = jitter * orig.h; @@ -706,7 +819,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in float scale = rand_uniform(.25, 2); float nw, nh; - + if(new_ar < 1){ nh = scale * h; nw = nh * new_ar; @@ -752,6 +865,8 @@ void *load_thread(void *ptr) *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); } else if (a.type == WRITING_DATA){ *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); + } else if (a.type == SEGMENTATION_DATA){ + *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); } else if (a.type == REGION_DATA){ *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); } else if (a.type == DETECTION_DATA){ @@ -811,6 +926,13 @@ void *load_threads(void *ptr) return 0; } +void load_data_blocking(load_args args) +{ + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + load_thread(ptr); +} + pthread_t load_data(load_args args) { pthread_t thread; diff --git a/src/data.h b/src/data.h index 16b334dc..c366d66e 100644 --- a/src/data.h +++ b/src/data.h @@ -28,7 +28,7 @@ typedef struct{ } data; typedef enum { - CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA } data_type; typedef struct load_args{ @@ -72,6 +72,7 @@ typedef struct{ void free_data(data d); pthread_t load_data(load_args args); +void load_data_blocking(load_args args); pthread_t load_data_in_thread(load_args args); diff --git a/src/image.c b/src/image.c index e1d19442..1a8c984a 100644 --- a/src/image.c +++ b/src/image.c @@ -25,6 +25,25 @@ float get_color(int c, int x, int max) return r; } +image mask_to_rgb(image mask) +{ + int n = mask.c; + image im = make_image(mask.w, mask.h, 3); + int i, j; + for(j = 0; j < n; ++j){ + int offset = j*123457 % n; + float red = get_color(2,offset,n); + float green = get_color(1,offset,n); + float blue = get_color(0,offset,n); + for(i = 0; i < im.w*im.h; ++i){ + im.data[i + 0*im.w*im.h] = mask.data[j*im.h*im.w + i]*red; + im.data[i + 1*im.w*im.h] = mask.data[j*im.h*im.w + i]*green; + im.data[i + 2*im.w*im.h] = mask.data[j*im.h*im.w + i]*blue; + } + } + return im; +} + void composite_image(image source, image dest, int dx, int dy) { int x,y,k; @@ -902,8 +921,9 @@ image random_crop_image(image im, int w, int h) return crop; } -image random_augment_image(image im, float angle, float aspect, int low, int high, int size) +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h) { + augment_args a = {0}; aspect = rand_scale(aspect); int r = rand_int(low, high); int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; @@ -911,15 +931,27 @@ image random_augment_image(image im, float angle, float aspect, int low, int hig float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; - float dx = (im.w*scale/aspect - size) / 2.; - float dy = (im.h*scale - size) / 2.; + float dx = (im.w*scale/aspect - w) / 2.; + float dy = (im.h*scale - w) / 2.; if(dx < 0) dx = 0; if(dy < 0) dy = 0; dx = rand_uniform(-dx, dx); dy = rand_uniform(-dy, dy); - image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy, aspect); + a.rad = rad; + a.scale = scale; + a.w = w; + a.h = h; + a.dx = dx; + a.dy = dy; + a.aspect = aspect; + return a; +} +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = random_augment_args(im, angle, aspect, low, high, w, h); + image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); return crop; } @@ -1300,7 +1332,7 @@ void test_resize(char *filename) show_image(c4, "C4"); #ifdef OPENCV while(1){ - image aug = random_augment_image(im, 0, .75, 320, 448, 320); + image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); show_image(aug, "aug"); free_image(aug); diff --git a/src/image.h b/src/image.h index abe99d6c..7201515c 100644 --- a/src/image.h +++ b/src/image.h @@ -19,6 +19,16 @@ #endif #endif +typedef struct { + int w; + int h; + float scale; + float rad; + float dx; + float dy; + float aspect; +} augment_args; + typedef struct { int h; int w; @@ -37,6 +47,7 @@ void show_image_cv(image p, const char *name, IplImage *disp); #endif #endif +image mask_to_rgb(image mask); float get_color(int c, int x, int max); void flip_image(image a); void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); @@ -48,9 +59,11 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs, image image_distance(image a, image b); void scale_image(image m, float s); image crop_image(image im, int dx, int dy, int w, int h); +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); image center_crop_image(image im, int w, int h); image random_crop_image(image im, int w, int h); -image random_augment_image(image im, float angle, float aspect, int low, int high, int size); +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); void random_distort_image(image im, float hue, float saturation, float exposure); image letterbox_image(image im, int w, int h); void letterbox_image_into(image im, int w, int h, image boxed); diff --git a/src/layer.h b/src/layer.h index 2470d84c..620d3674 100644 --- a/src/layer.h +++ b/src/layer.h @@ -73,6 +73,7 @@ struct layer{ int stride; int reverse; int flatten; + int spatial; int pad; int sqrt; int flip; diff --git a/src/parser.c b/src/parser.c index a1cf5f7d..47f2629a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -258,6 +258,10 @@ softmax_layer parse_softmax(list *options, size_params params) layer.temperature = option_find_float_quiet(options, "temperature", 1); char *tree_file = option_find_str(options, "tree", 0); if (tree_file) layer.softmax_tree = read_tree(tree_file); + layer.w = params.w; + layer.h = params.h; + layer.c = params.c; + layer.spatial = option_find_float_quiet(options, "spatial", 0); return layer; } diff --git a/src/region_layer.c b/src/region_layer.c index a9a31208..5fe931c6 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -29,7 +29,7 @@ layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) l.bias_updates = calloc(n*2, sizeof(float)); l.outputs = h*w*n*(classes + coords + 1); l.inputs = l.outputs; - l.truths = 30*(5); + l.truths = 30*(l.coords + 1); l.delta = calloc(batch*l.outputs, sizeof(float)); l.output = calloc(batch*l.outputs, sizeof(float)); int i; @@ -153,7 +153,7 @@ void forward_region_layer(const layer l, network net) int index = entry_index(l, b, n*l.w*l.h, 0); activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); index = entry_index(l, b, n*l.w*l.h, 4); - activate_array(l.output + index, l.w*l.h, LOGISTIC); + if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC); } } if (l.softmax_tree){ @@ -165,8 +165,8 @@ void forward_region_layer(const layer l, network net) count += group_size; } } else if (l.softmax){ - int index = entry_index(l, 0, 0, 5); - softmax_cpu(net.input + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index); + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index); } #endif @@ -184,9 +184,9 @@ void forward_region_layer(const layer l, network net) if(l.softmax_tree){ int onlyclass = 0; for(t = 0; t < 30; ++t){ - box truth = float_to_box(net.truth + t*5 + b*l.truths, 1); + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); if(!truth.x) break; - int class = net.truth[t*5 + b*l.truths + 4]; + int class = net.truth[t*(l.coords + 1) + b*l.truths + 4]; float maxp = 0; int maxi = 0; if(truth.x > 100000 && truth.y > 100000){ @@ -230,6 +230,7 @@ void forward_region_layer(const layer l, network net) int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); avg_anyobj += l.output[obj_index]; l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); if (best_iou > l.thresh) { l.delta[obj_index] = 0; } @@ -282,16 +283,19 @@ void forward_region_layer(const layer l, network net) avg_iou += iou; //l.delta[best_index + 4] = iou - l.output[best_index + 4]; - int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4); + int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords); avg_obj += l.output[obj_index]; l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); if (l.rescore) { l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); } + if(l.background){ + l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); + } - int class = net.truth[t*5 + b*l.truths + 4]; + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; if (l.map) class = l.map[class]; - int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 5); + int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1); delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat); ++count; ++class_count; @@ -380,10 +384,10 @@ void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, f } int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); - float scale = predictions[obj_index]; + float scale = l.background ? 1 : predictions[obj_index]; boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); - int class_index = entry_index(l, 0, n*l.w*l.h + i, 5); + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); if(l.softmax_tree){ hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h); @@ -436,8 +440,8 @@ void forward_region_layer_gpu(const layer l, network net) for(n = 0; n < l.n; ++n){ int index = entry_index(l, b, n*l.w*l.h, 0); activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); - index = entry_index(l, b, n*l.w*l.h, 4); - activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC); } } if (l.softmax_tree){ @@ -450,9 +454,9 @@ void forward_region_layer_gpu(const layer l, network net) count += group_size; } } else if (l.softmax) { - int index = entry_index(l, 0, 0, 5); + int index = entry_index(l, 0, 0, l.coords + !l.background); //printf("%d\n", index); - softmax_gpu(net.input_gpu + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index); + softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index); } if(!net.train || l.onlyforward){ cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); @@ -479,8 +483,8 @@ void backward_region_layer_gpu(const layer l, network net) for(n = 0; n < l.n; ++n){ int index = entry_index(l, b, n*l.w*l.h, 0); gradient_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); - index = entry_index(l, b, n*l.w*l.h, 4); - gradient_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) gradient_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); } } axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); diff --git a/src/segmenter.c b/src/segmenter.c new file mode 100644 index 00000000..fab66cb0 --- /dev/null +++ b/src/segmenter.c @@ -0,0 +1,262 @@ +#include "network.h" +#include "utils.h" +#include "parser.h" +#include "option_list.h" +#include "blas.h" +#include "assert.h" +#include "cuda.h" +#include + +void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network *nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&nets[i], weightfile); + } + if(clear) *nets[i].seen = 0; + nets[i].learning_rate *= ngpus; + } + srand(time(0)); + network net = nets[0]; + + int imgs = net.batch * net.subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.threads = 32; + + args.min = net.min_crop; + args.max = net.max_crop; + args.angle = net.angle; + args.aspect = net.aspect; + args.exposure = net.exposure; + args.saturation = net.saturation; + args.hue = net.hue; + args.size = net.w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = SEGMENTATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + free_data(train); + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_segmenter(char *datafile, char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net.w, net.h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image m = float_to_image(sized.w, sized.h, 80, predictions); + image rgb = mask_to_rgb(m); + show_image(sized, "orig"); + show_image(rgb, "pred"); + cvWaitKey(0); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + free_image(im); + free_image(sized); + free_image(rgb); + if (filename) break; + } +} + + +void demo_segmenter(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Regressor Demo\n"); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + + srand(2222222); + CvCapture * cap; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow("Regressor", CV_WINDOW_NORMAL); + cvResizeWindow("Regressor", 512, 512); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net.w, net.h); + show_image(in, "Regressor"); + + float *predictions = network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + printf("People: %f\n", predictions[0]); + + free_image(in_s); + free_image(in); + + cvWaitKey(10); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_segmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/src/softmax_layer.c b/src/softmax_layer.c index 1eb6e972..86b0237c 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -70,7 +70,11 @@ void forward_softmax_layer_gpu(const softmax_layer l, network net) count += group_size; } } else { - softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + if(l.spatial){ + softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); + }else{ + softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + } } }