what do you even write here?

This commit is contained in:
Joseph Redmon 2016-08-11 11:54:24 -07:00
parent 845ab75796
commit aebe937710
12 changed files with 144 additions and 58 deletions

View File

@ -1,19 +1,29 @@
[net]
batch=1
subdivisions=1
batch=64
subdivisions=2
height=448
width=448
channels=3
momentum=0.9
decay=0.0005
learning_rate=0.001
learning_rate=0.0005
policy=steps
steps=200,400,600,20000,30000
scales=2.5,2,2,.1,.1
max_batches = 40000
[crop]
crop_width=448
crop_height=448
flip=0
angle=0
saturation = 1.5
exposure = 1.5
noadjust=1
[convolutional]
batch_normalize=1
filters=64
size=7
stride=2
@ -25,6 +35,7 @@ size=2
stride=2
[convolutional]
batch_normalize=1
filters=192
size=3
stride=1
@ -36,6 +47,7 @@ size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
@ -43,6 +55,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
@ -50,6 +63,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
@ -57,6 +71,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
@ -68,6 +83,7 @@ size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
@ -75,6 +91,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
@ -82,6 +99,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
@ -89,6 +107,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
@ -96,6 +115,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
@ -103,6 +123,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
@ -110,6 +131,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
@ -117,6 +139,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
@ -124,6 +147,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
@ -131,6 +155,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
@ -142,6 +167,7 @@ size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
@ -149,6 +175,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
@ -156,6 +183,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
@ -163,6 +191,7 @@ pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
@ -172,6 +201,7 @@ activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
@ -179,6 +209,7 @@ filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
@ -186,6 +217,7 @@ filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
@ -193,18 +225,25 @@ filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[connected]
output=4096
[local]
size=3
stride=1
pad=1
filters=256
activation=leaky
[dropout]
probability=.5
[connected]
output= 1470
output= 1715
activation=linear
[detection]
@ -212,7 +251,7 @@ classes=20
coords=4
rescore=1
side=7
num=2
num=3
softmax=0
sqrt=1
jitter=.2

View File

@ -41,7 +41,7 @@ list *read_data_cfg(char *filename)
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
{
int nthreads = 2;
int nthreads = 8;
int i;
data_seed = time(0);
@ -82,6 +82,9 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
args.min = net.min_crop;
args.max = net.max_crop;
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.size = net.w;
args.paths = paths;
@ -113,14 +116,14 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
/*
int u;
for(u = 0; u < net.batch; ++u){
image im = float_to_image(net.w, net.h, 3, train.X.vals[u]);
show_image(im, "loaded");
cvWaitKey(0);
}
*/
if(0){
int u;
for(u = 0; u < imgs; ++u){
image im = float_to_image(net.w, net.h, 3, train.X.vals[u]);
show_image(im, "loaded");
cvWaitKey(0);
}
}
float loss = train_network(net, train);
if(avg_loss == -1) avg_loss = loss;

View File

@ -265,7 +265,7 @@ void forward_connected_layer_gpu(connected_layer l, network_state state)
void backward_connected_layer_gpu(connected_layer l, network_state state)
{
int i;
constrain_ongpu(l.outputs*l.batch, 5, l.delta_gpu, 1);
constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
for(i = 0; i < l.batch; ++i){
axpy_ongpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1);

View File

@ -100,7 +100,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
return X;
}
matrix load_image_cropped_paths(char **paths, int n, int min, int max, int size)
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float exposure, float saturation)
{
int i;
matrix X;
@ -110,9 +110,14 @@ matrix load_image_cropped_paths(char **paths, int n, int min, int max, int size)
for(i = 0; i < n; ++i){
image im = load_image_color(paths[i], 0, 0);
image crop = random_resize_crop_image(im, min, max, size);
image crop = random_augment_image(im, angle, min, max, size);
int flip = rand_r(&data_seed)%2;
if (flip) flip_image(crop);
float exp = rand_uniform(1./exposure, exposure);
float sat = rand_uniform(1./saturation, saturation);
exposure_image(crop, exp);
exposure_image(crop, sat);
/*
show_image(im, "orig");
show_image(crop, "crop");
@ -668,14 +673,17 @@ void *load_thread(void *ptr)
//printf("Loading data: %d\n", rand_r(&data_seed));
load_args a = *(struct load_args*)ptr;
if(a.exposure == 0) a.exposure = 1;
if(a.saturation == 0) a.saturation = 1;
if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
} else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == STUDY_DATA){
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
} else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == REGION_DATA){
@ -690,7 +698,7 @@ void *load_thread(void *ptr)
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size);
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
}
free(ptr);
@ -732,13 +740,13 @@ data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
return d;
}
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size)
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
{
data d = {0};
d.indexes = calloc(n, sizeof(int));
if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
d.shallow = 0;
d.X = load_image_cropped_paths(paths, n, min, max, size);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.y = load_labels_paths(paths, n, labels, k);
if(m) free(paths);
return d;
@ -774,25 +782,25 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
return d;
}
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size)
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.shallow = 0;
d.X = load_image_cropped_paths(paths, n, min, max, size);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.y = load_labels_paths(paths, n, labels, k);
if(m) free(paths);
return d;
}
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size)
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.w = size;
d.h = size;
d.shallow = 0;
d.X = load_image_cropped_paths(paths, n, min, max, size);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.y = load_tags_paths(paths, n, k);
if(m) free(paths);
return d;

View File

@ -51,6 +51,9 @@ typedef struct load_args{
int background;
int scale;
float jitter;
float angle;
float saturation;
float exposure;
data *d;
image *im;
image *resized;
@ -72,10 +75,10 @@ data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_go(char *filename);
box_label *read_boxes(char *filename, int *n);

View File

@ -132,7 +132,7 @@ void train_go(char *cfgfile, char *weightfile)
char buff[256];
float *board = calloc(19*19*net.batch, sizeof(float));
float *move = calloc(19*19*net.batch, sizeof(float));
moves m = load_go_moves("/home/pjreddie/go.train");
moves m = load_go_moves("/home/pjreddie/backup/go.train");
//moves m = load_go_moves("games.txt");
int N = m.n;

View File

@ -459,6 +459,25 @@ void show_image_cv(image p, const char *name)
return out;
}
image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
{
int x, y, c;
float cx = im.w/2.;
float cy = im.h/2.;
image rot = make_image(w, h, im.c);
for(c = 0; c < im.c; ++c){
for(y = 0; y < h; ++y){
for(x = 0; x < w; ++x){
float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
float val = bilinear_interpolate(im, rx, ry, c);
set_pixel(rot, x, y, c, val);
}
}
}
return rot;
}
image rotate_image(image im, float rad)
{
int x, y, c;
@ -603,15 +622,19 @@ image random_crop_image(image im, int w, int h)
return crop;
}
image random_resize_crop_image(image im, int low, int high, int size)
image random_augment_image(image im, float angle, int low, int high, int size)
{
int r = rand_int(low, high);
image resized = resize_min(im, r);
int dx = rand_int(0, resized.w - size);
int dy = rand_int(0, resized.h - size);
image crop = crop_image(resized, dx, dy, size, size);
int min = (im.h < im.w) ? im.h : im.w;
float scale = (float)r / min;
float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
int dx = rand_int(0, scale * im.w - size);
int dy = rand_int(0, scale * im.h - size);
//printf("%d %d\n", dx, dy);
image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy);
if(resized.data != im.data) free_image(resized);
return crop;
}
@ -794,23 +817,6 @@ void saturate_exposure_image(image im, float sat, float exposure)
constrain_image(im);
}
/*
image saturate_image(image im, float sat)
{
image gray = grayscale_image(im);
image blend = blend_image(im, gray, sat);
free_image(gray);
constrain_image(blend);
return blend;
}
image brightness_image(image im, float b)
{
image bright = make_image(im.w, im.h, im.c);
return bright;
}
*/
float bilinear_interpolate(image im, float x, float y, int c)
{
int ix = (int) floorf(x);
@ -893,6 +899,7 @@ void test_resize(char *filename)
image bin = binarize_image(im);
/*
#ifdef GPU
image r = resize_image(im, im.w, im.h);
image black = make_image(im.w*2 + 3, im.h*2 + 3, 9);
@ -911,7 +918,16 @@ void test_resize(char *filename)
show_image_layers(black, "Black");
show_image(black2, "Recreate");
#endif
*/
image rot = rotate_crop_image(im, -.2618, 1, im.w/2, im.h/2, 0, 0);
image rot3 = rotate_crop_image(im, -.2618, 2, im.w, im.h, im.w/2, 0);
image rot2 = rotate_crop_image(im, -.2618, 1, im.w, im.h, 0, 0);
show_image(rot, "Rotated");
show_image(rot2, "base");
show_image(rot3, "Rotated2");
/*
show_image(im, "Original");
show_image(bin, "Binary");
show_image(gray, "Gray");
@ -919,6 +935,7 @@ void test_resize(char *filename)
show_image(sat5, "Saturation-.5");
show_image(exp2, "Exposure-2");
show_image(exp5, "Exposure-.5");
*/
#ifdef OPENCV
cvWaitKey(0);
#endif
@ -1036,7 +1053,11 @@ float get_pixel(image m, int x, int y, int c)
}
float get_pixel_extend(image m, int x, int y, int c)
{
if(x < 0 || x >= m.w || y < 0 || y >= m.h || c < 0 || c >= m.c) return 0;
if(x < 0) x = 0;
if(x >= m.w) x = m.w-1;
if(y < 0) y = 0;
if(y >= m.h) y = m.h-1;
if(c < 0 || c >= m.c) return 0;
return get_pixel(m, x, y, c);
}
void set_pixel(image m, int x, int y, int c, float val)

View File

@ -31,7 +31,7 @@ image image_distance(image a, image b);
void scale_image(image m, float s);
image crop_image(image im, int dx, int dy, int w, int h);
image random_crop_image(image im, int w, int h);
image random_resize_crop_image(image im, int low, int high, int size);
image random_augment_image(image im, float angle, int low, int high, int size);
image resize_image(image im, int w, int h);
image resize_min(image im, int min);
void translate_image(image m, float s);

View File

@ -40,6 +40,9 @@ typedef struct network{
int h, w, c;
int max_crop;
int min_crop;
float angle;
float exposure;
float saturation;
int gpu_index;

View File

@ -483,6 +483,10 @@ void parse_net_options(list *options, network *net)
net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
net->angle = option_find_float_quiet(options, "angle", 0);
net->saturation = option_find_float_quiet(options, "saturation", 1);
net->exposure = option_find_float_quiet(options, "exposure", 1);
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
char *policy_s = option_find_str(options, "policy", "constant");

View File

@ -531,7 +531,6 @@ int rand_int(int min, int max)
}
// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
#define TWO_PI 6.2831853071795864769252866
float rand_normal()
{
static int haveSpare = 0;
@ -578,6 +577,11 @@ size_t rand_size_t()
float rand_uniform(float min, float max)
{
if(max < min){
float swap = min;
min = max;
max = swap;
}
return ((float)rand()/RAND_MAX * (max - min)) + min;
}

View File

@ -5,6 +5,7 @@
#include "list.h"
#define SECRET_NUM -1234
#define TWO_PI 6.2831853071795864769252866
void shuffle(void *arr, size_t n, size_t size);
void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections);