writing stuff

This commit is contained in:
Joseph Redmon
2015-09-22 15:22:50 -07:00
parent fed6d6e31d
commit 59e356730f
7 changed files with 701 additions and 602 deletions

49
cfg/writing.cfg Normal file
View File

@ -0,0 +1,49 @@
[net]
batch=64
subdivisions=1
height=256
width=256
channels=3
learning_rate=0.00001
momentum=0.9
decay=0.0005
seen=0
[crop]
crop_height=256
crop_width=256
flip=0
angle=0
saturation=1
exposure=1
[convolutional]
filters=32
size=3
stride=1
pad=1
activation=ramp
[convolutional]
filters=32
size=3
stride=1
pad=1
activation=ramp
[convolutional]
filters=32
size=3
stride=1
pad=1
activation=ramp
[convolutional]
filters=1
size=5
stride=1
pad=1
activation=logistic
[cost]

View File

@ -54,7 +54,12 @@ matrix load_image_paths_gray(char **paths, int n, int w, int h)
X.cols = 0;
for(i = 0; i < n; ++i){
image im = load_image(paths[i], w, h, 1);
image im = load_image(paths[i], w, h, 3);
image gray = grayscale_image(im);
free_image(im);
im = gray;
X.vals[i] = im.data;
X.cols = im.h*im.w*im.c;
}
@ -571,14 +576,14 @@ pthread_t load_data_in_thread(load_args args)
return thread;
}
data load_data_writing(char **paths, int n, int m, int w, int h)
data load_data_writing(char **paths, int n, int m, int w, int h, int downsample)
{
if(m) paths = get_random_paths(paths, n, m);
char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png");
data d;
d.shallow = 0;
d.X = load_image_paths(paths, n, w, h);
d.y = load_image_paths_gray(replace_paths, n, w/8, h/8);
d.y = load_image_paths_gray(replace_paths, n, w/downsample, h/downsample);
if(m) free(paths);
int i;
for(i = 0; i < n; ++i) free(replace_paths[i]);

View File

@ -68,7 +68,7 @@ box_label *read_boxes(char *filename, int *n);
data load_cifar10_data(char *filename);
data load_all_cifar10();
data load_data_writing(char **paths, int n, int m, int w, int h);
data load_data_writing(char **paths, int n, int m, int w, int h, int downsample);
list *get_paths(char *filename);
char **get_labels(char *filename);

View File

@ -241,21 +241,21 @@ void show_image_cv(image p, char *name)
}
cvShowImage(buff, disp);
cvReleaseImage(&disp);
}
}
#endif
void show_image(image p, char *name)
{
void show_image(image p, char *name)
{
#ifdef OPENCV
show_image_cv(p, name);
#else
fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
save_image(p, name);
#endif
}
}
void save_image(image im, char *name)
{
void save_image(image im, char *name)
{
char buff[256];
//sprintf(buff, "%s (%d)", name, windows);
sprintf(buff, "%s.png", name);
@ -269,11 +269,11 @@ void show_image_cv(image p, char *name)
int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
free(data);
if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
}
}
#ifdef OPENCV
void save_image_jpg(image p, char *name)
{
void save_image_jpg(image p, char *name)
{
image copy = copy_image(p);
rgbgr_image(copy);
int x,y,k;
@ -293,11 +293,11 @@ void show_image_cv(image p, char *name)
cvSaveImage(buff, disp,0);
cvReleaseImage(&disp);
free_image(copy);
}
#endif
}
#endif
void show_image_layers(image p, char *name)
{
void show_image_layers(image p, char *name)
{
int i;
char buff[256];
for(i = 0; i < p.c; ++i){
@ -306,41 +306,41 @@ void show_image_cv(image p, char *name)
show_image(layer, buff);
free_image(layer);
}
}
}
void show_image_collapsed(image p, char *name)
{
void show_image_collapsed(image p, char *name)
{
image c = collapse_image_layers(p, 1);
show_image(c, name);
free_image(c);
}
}
image make_empty_image(int w, int h, int c)
{
image make_empty_image(int w, int h, int c)
{
image out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}
}
image make_image(int w, int h, int c)
{
image make_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = calloc(h*w*c, sizeof(float));
return out;
}
}
image float_to_image(int w, int h, int c, float *data)
{
image float_to_image(int w, int h, int c, float *data)
{
image out = make_empty_image(w,h,c);
out.data = data;
return out;
}
}
image rotate_image(image im, float rad)
{
image rotate_image(image im, float rad)
{
int x, y, c;
float cx = im.w/2.;
float cy = im.h/2.;
@ -356,22 +356,22 @@ void show_image_cv(image p, char *name)
}
}
return rot;
}
}
void translate_image(image m, float s)
{
void translate_image(image m, float s)
{
int i;
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
}
}
void scale_image(image m, float s)
{
void scale_image(image m, float s)
{
int i;
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
}
}
image crop_image(image im, int dx, int dy, int w, int h)
{
image crop_image(image im, int dx, int dy, int w, int h)
{
image cropped = make_image(w, h, im.c);
int i, j, k;
for(k = 0; k < im.c; ++k){
@ -388,21 +388,21 @@ void show_image_cv(image p, char *name)
}
}
return cropped;
}
}
float three_way_max(float a, float b, float c)
{
float three_way_max(float a, float b, float c)
{
return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ;
}
}
float three_way_min(float a, float b, float c)
{
float three_way_min(float a, float b, float c)
{
return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ;
}
}
// http://www.cs.rit.edu/~ncs/color/t_convert.html
void rgb_to_hsv(image im)
{
// http://www.cs.rit.edu/~ncs/color/t_convert.html
void rgb_to_hsv(image im)
{
assert(im.c == 3);
int i, j;
float r, g, b;
@ -435,10 +435,10 @@ void show_image_cv(image p, char *name)
set_pixel(im, i, j, 2, v);
}
}
}
}
void hsv_to_rgb(image im)
{
void hsv_to_rgb(image im)
{
assert(im.c == 3);
int i, j;
float r, g, b;
@ -476,13 +476,13 @@ void show_image_cv(image p, char *name)
set_pixel(im, i, j, 2, b);
}
}
}
}
image grayscale_image(image im)
{
image grayscale_image(image im)
{
assert(im.c == 3);
int i, j, k;
image gray = make_image(im.w, im.h, im.c);
image gray = make_image(im.w, im.h, 1);
float scale[] = {0.587, 0.299, 0.114};
for(k = 0; k < im.c; ++k){
for(j = 0; j < im.h; ++j){
@ -491,13 +491,11 @@ void show_image_cv(image p, char *name)
}
}
}
memcpy(gray.data + im.w*im.h*1, gray.data, sizeof(float)*im.w*im.h);
memcpy(gray.data + im.w*im.h*2, gray.data, sizeof(float)*im.w*im.h);
return gray;
}
}
image blend_image(image fore, image back, float alpha)
{
image blend_image(image fore, image back, float alpha)
{
assert(fore.w == back.w && fore.h == back.h && fore.c == back.c);
image blend = make_image(fore.w, fore.h, fore.c);
int i, j, k;
@ -511,10 +509,10 @@ void show_image_cv(image p, char *name)
}
}
return blend;
}
}
void scale_image_channel(image im, int c, float v)
{
void scale_image_channel(image im, int c, float v)
{
int i, j;
for(j = 0; j < im.h; ++j){
for(i = 0; i < im.w; ++i){
@ -523,34 +521,34 @@ void show_image_cv(image p, char *name)
set_pixel(im, i, j, c, pix);
}
}
}
}
void saturate_image(image im, float sat)
{
void saturate_image(image im, float sat)
{
rgb_to_hsv(im);
scale_image_channel(im, 1, sat);
hsv_to_rgb(im);
constrain_image(im);
}
}
void exposure_image(image im, float sat)
{
void exposure_image(image im, float sat)
{
rgb_to_hsv(im);
scale_image_channel(im, 2, sat);
hsv_to_rgb(im);
constrain_image(im);
}
}
void saturate_exposure_image(image im, float sat, float exposure)
{
void saturate_exposure_image(image im, float sat, float exposure)
{
rgb_to_hsv(im);
scale_image_channel(im, 1, sat);
scale_image_channel(im, 2, exposure);
hsv_to_rgb(im);
constrain_image(im);
}
}
/*
/*
image saturate_image(image im, float sat)
{
image gray = grayscale_image(im);
@ -567,8 +565,8 @@ void show_image_cv(image p, char *name)
}
*/
float bilinear_interpolate(image im, float x, float y, int c)
{
float bilinear_interpolate(image im, float x, float y, int c)
{
int ix = (int) floorf(x);
int iy = (int) floorf(y);
@ -580,10 +578,10 @@ void show_image_cv(image p, char *name)
(1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) +
dy * dx * get_pixel_extend(im, ix+1, iy+1, c);
return val;
}
}
image resize_image(image im, int w, int h)
{
image resize_image(image im, int w, int h)
{
image resized = make_image(w, h, im.c);
image part = make_image(w, im.h, im.c);
int r, c, k;
@ -624,10 +622,10 @@ void show_image_cv(image p, char *name)
free_image(part);
return resized;
}
}
void test_resize(char *filename)
{
void test_resize(char *filename)
{
image im = load_image(filename, 0,0, 3);
image gray = grayscale_image(im);
@ -652,11 +650,11 @@ void show_image_cv(image p, char *name)
#ifdef OPENCV
cvWaitKey(0);
#endif
}
}
#ifdef OPENCV
image ipl_to_image(IplImage* src)
{
image ipl_to_image(IplImage* src)
{
unsigned char *data = (unsigned char *)src->imageData;
int h = src->height;
int w = src->width;
@ -673,10 +671,10 @@ void show_image_cv(image p, char *name)
}
}
return out;
}
}
image load_image_cv(char *filename, int channels)
{
image load_image_cv(char *filename, int channels)
{
IplImage* src = 0;
int flag = -1;
if (channels == 0) flag = -1;
@ -695,13 +693,13 @@ void show_image_cv(image p, char *name)
cvReleaseImage(&src);
rgbgr_image(out);
return out;
}
}
#endif
image load_image_stb(char *filename, int channels)
{
image load_image_stb(char *filename, int channels)
{
int w, h, c;
unsigned char *data = stbi_load(filename, &w, &h, &c, channels);
if (!data) {
@ -722,10 +720,10 @@ void show_image_cv(image p, char *name)
}
free(data);
return im;
}
}
image load_image(char *filename, int w, int h, int c)
{
image load_image(char *filename, int w, int h, int c)
{
#ifdef OPENCV
image out = load_image_cv(filename, c);
#else
@ -738,46 +736,46 @@ void show_image_cv(image p, char *name)
out = resized;
}
return out;
}
}
image load_image_color(char *filename, int w, int h)
{
image load_image_color(char *filename, int w, int h)
{
return load_image(filename, w, h, 3);
}
}
image get_image_layer(image m, int l)
{
image get_image_layer(image m, int l)
{
image out = make_image(m.w, m.h, 1);
int i;
for(i = 0; i < m.h*m.w; ++i){
out.data[i] = m.data[i+l*m.h*m.w];
}
return out;
}
}
float get_pixel(image m, int x, int y, int c)
{
float get_pixel(image m, int x, int y, int c)
{
assert(x < m.w && y < m.h && c < m.c);
return m.data[c*m.h*m.w + y*m.w + x];
}
float get_pixel_extend(image m, int x, int y, int c)
{
}
float get_pixel_extend(image m, int x, int y, int c)
{
if(x < 0 || x >= m.w || y < 0 || y >= m.h || c < 0 || c >= m.c) return 0;
return get_pixel(m, x, y, c);
}
void set_pixel(image m, int x, int y, int c, float val)
{
}
void set_pixel(image m, int x, int y, int c, float val)
{
assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] = val;
}
void add_pixel(image m, int x, int y, int c, float val)
{
}
void add_pixel(image m, int x, int y, int c, float val)
{
assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] += val;
}
}
void print_image(image m)
{
void print_image(image m)
{
int i, j, k;
for(i =0 ; i < m.c; ++i){
for(j =0 ; j < m.h; ++j){
@ -791,10 +789,10 @@ void show_image_cv(image p, char *name)
printf("\n");
}
printf("\n");
}
}
image collapse_images_vert(image *ims, int n)
{
image collapse_images_vert(image *ims, int n)
{
int color = 1;
int border = 1;
int h,w,c;
@ -826,10 +824,10 @@ void show_image_cv(image p, char *name)
free_image(copy);
}
return filters;
}
}
image collapse_images_horz(image *ims, int n)
{
image collapse_images_horz(image *ims, int n)
{
int color = 1;
int border = 1;
int h,w,c;
@ -862,10 +860,10 @@ void show_image_cv(image p, char *name)
free_image(copy);
}
return filters;
}
}
void show_images(image *ims, int n, char *window)
{
void show_images(image *ims, int n, char *window)
{
image m = collapse_images_vert(ims, n);
/*
int w = 448;
@ -882,9 +880,9 @@ void show_image_cv(image p, char *name)
show_image(sized, window);
free_image(sized);
free_image(m);
}
}
void free_image(image m)
{
void free_image(image m)
{
free(m.data);
}
}

View File

@ -61,6 +61,7 @@ void forward_region_layer(const region_layer l, network_state state)
if(state.train){
float avg_iou = 0;
float avg_cat = 0;
float avg_allcat = 0;
float avg_obj = 0;
float avg_anyobj = 0;
int count = 0;
@ -90,6 +91,7 @@ void forward_region_layer(const region_layer l, network_state state)
l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]);
*(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2);
if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
avg_allcat += l.output[class_index+j];
}
box truth = float_to_box(state.truth + truth_index + 1 + l.classes);
@ -151,7 +153,7 @@ void forward_region_layer(const region_layer l, network_state state)
LOGISTIC, l.delta + index + locations*l.classes);
}
}
printf("Region Avg IOU: %f, Avg Cat Pred: %f, Avg Obj: %f, Avg Any: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
printf("Region Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
}
}

View File

@ -132,21 +132,22 @@ void train_swag(char *cfgfile, char *weightfile)
void convert_swag_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes)
{
int i,j,n;
int per_cell = 5*num+classes;
//int per_cell = 5*num+classes;
for (i = 0; i < side*side; ++i){
int row = i / side;
int col = i % side;
for(n = 0; n < num; ++n){
int offset = i*per_cell + 5*n;
float scale = predictions[offset];
int index = i*num + n;
boxes[index].x = (predictions[offset + 1] + col) / side * w;
boxes[index].y = (predictions[offset + 2] + row) / side * h;
boxes[index].w = pow(predictions[offset + 3], (square?2:1)) * w;
boxes[index].h = pow(predictions[offset + 4], (square?2:1)) * h;
int p_index = side*side*classes + i*num + n;
float scale = predictions[p_index];
int box_index = side*side*(classes + num) + (i*num + n)*4;
boxes[index].x = (predictions[box_index + 0] + col) / side * w;
boxes[index].y = (predictions[box_index + 1] + row) / side * h;
boxes[index].w = pow(predictions[box_index + 2], (square?2:1)) * w;
boxes[index].h = pow(predictions[box_index + 3], (square?2:1)) * h;
for(j = 0; j < classes; ++j){
offset = i*per_cell + 5*num;
float prob = scale*predictions[offset+j];
int class_index = i*classes;
float prob = scale*predictions[class_index+j];
probs[index][j] = (prob > thresh) ? prob : 0;
}
}

View File

@ -2,8 +2,13 @@
#include "utils.h"
#include "parser.h"
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#endif
void train_writing(char *cfgfile, char *weightfile)
{
char *backup_directory = "/home/pjreddie/backup/";
data_seed = time(0);
srand(time(0));
float avg_loss = -1;
@ -23,17 +28,17 @@ void train_writing(char *cfgfile, char *weightfile)
while(1){
++i;
time=clock();
data train = load_data_writing(paths, imgs, plist->size, 512, 512);
data train = load_data_writing(paths, imgs, plist->size, 256, 256, 1);
printf("Loaded %lf seconds\n",sec(clock()-time));
time=clock();
float loss = train_network(net, train);
#ifdef GPU
float *out = get_network_output_gpu(net);
#else
float *out = get_network_output(net);
#endif
/*
image pred = float_to_image(64, 64, 1, out);
print_image(pred);
*/
/*
/*
image im = float_to_image(256, 256, 3, train.X.vals[0]);
image lab = float_to_image(64, 64, 1, train.y.vals[0]);
image pred = float_to_image(64, 64, 1, out);
@ -48,16 +53,53 @@ void train_writing(char *cfgfile, char *weightfile)
avg_loss = avg_loss*.9 + loss*.1;
printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
free_data(train);
if((i % 20000) == 0) net.learning_rate *= .1;
//if(i%100 == 0 && net.learning_rate > .00001) net.learning_rate *= .97;
if(i%1000==0){
char buff[256];
sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i);
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
}
}
void test_writing(char *cfgfile, char *weightfile, char *outfile)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 1);
srand(2222222);
clock_t time;
char filename[256];
fgets(filename, 256, stdin);
strtok(filename, "\n");
image im = load_image_color(filename, 0, 0);
//image im = load_image_color("/home/pjreddie/darknet/data/figs/C02-1001-Figure-1.png", 0, 0);
image sized = resize_image(im, net.w, net.h);
printf("%d %d %d\n", im.h, im.w, im.c);
float *X = sized.data;
time=clock();
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", filename, sec(clock()-time));
image pred = get_network_image(net);
if (outfile) {
printf("Save image as %s.png (shape: %d %d)\n", outfile, pred.w, pred.h);
save_image(pred, outfile);
} else {
show_image(pred, "prediction");
#ifdef OPENCV
cvWaitKey(0);
cvDestroyAllWindows();
#endif
}
free_image(im);
free_image(sized);
}
void run_writing(int argc, char **argv)
{
if(argc < 4){
@ -67,6 +109,8 @@ void run_writing(int argc, char **argv)
char *cfg = argv[3];
char *weights = (argc > 4) ? argv[4] : 0;
char *outfile = (argc > 5) ? argv[5] : 0;
if(0==strcmp(argv[2], "train")) train_writing(cfg, weights);
else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, outfile);
}