mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
adding new tiny-yolo
This commit is contained in:
parent
b8eb8b0a40
commit
6b38dcdce0
@ -1,27 +1,24 @@
|
|||||||
[net]
|
[net]
|
||||||
batch=64
|
batch=64
|
||||||
subdivisions=64
|
subdivisions=2
|
||||||
height=448
|
height=448
|
||||||
width=448
|
width=448
|
||||||
channels=3
|
channels=3
|
||||||
momentum=0.9
|
momentum=0.9
|
||||||
decay=0.0005
|
decay=0.0005
|
||||||
|
|
||||||
learning_rate=0.0001
|
saturation=.75
|
||||||
|
exposure=.75
|
||||||
|
hue = .1
|
||||||
|
|
||||||
|
learning_rate=0.0005
|
||||||
policy=steps
|
policy=steps
|
||||||
steps=20,40,60,80,20000,30000
|
steps=200,400,600,800,20000,30000
|
||||||
scales=5,5,2,2,.1,.1
|
scales=2.5,2,2,2,.1,.1
|
||||||
max_batches = 40000
|
max_batches = 40000
|
||||||
|
|
||||||
[crop]
|
|
||||||
crop_width=448
|
|
||||||
crop_height=448
|
|
||||||
flip=0
|
|
||||||
angle=0
|
|
||||||
saturation = 1.5
|
|
||||||
exposure = 1.5
|
|
||||||
|
|
||||||
[convolutional]
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
filters=16
|
filters=16
|
||||||
size=3
|
size=3
|
||||||
stride=1
|
stride=1
|
||||||
@ -33,6 +30,7 @@ size=2
|
|||||||
stride=2
|
stride=2
|
||||||
|
|
||||||
[convolutional]
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
filters=32
|
filters=32
|
||||||
size=3
|
size=3
|
||||||
stride=1
|
stride=1
|
||||||
@ -44,6 +42,7 @@ size=2
|
|||||||
stride=2
|
stride=2
|
||||||
|
|
||||||
[convolutional]
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
filters=64
|
filters=64
|
||||||
size=3
|
size=3
|
||||||
stride=1
|
stride=1
|
||||||
@ -55,6 +54,7 @@ size=2
|
|||||||
stride=2
|
stride=2
|
||||||
|
|
||||||
[convolutional]
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
filters=128
|
filters=128
|
||||||
size=3
|
size=3
|
||||||
stride=1
|
stride=1
|
||||||
@ -66,6 +66,7 @@ size=2
|
|||||||
stride=2
|
stride=2
|
||||||
|
|
||||||
[convolutional]
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
filters=256
|
filters=256
|
||||||
size=3
|
size=3
|
||||||
stride=1
|
stride=1
|
||||||
@ -77,6 +78,7 @@ size=2
|
|||||||
stride=2
|
stride=2
|
||||||
|
|
||||||
[convolutional]
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
filters=512
|
filters=512
|
||||||
size=3
|
size=3
|
||||||
stride=1
|
stride=1
|
||||||
@ -88,37 +90,21 @@ size=2
|
|||||||
stride=2
|
stride=2
|
||||||
|
|
||||||
[convolutional]
|
[convolutional]
|
||||||
filters=1024
|
batch_normalize=1
|
||||||
size=3
|
size=3
|
||||||
stride=1
|
stride=1
|
||||||
pad=1
|
pad=1
|
||||||
|
filters=1024
|
||||||
activation=leaky
|
activation=leaky
|
||||||
|
|
||||||
[convolutional]
|
[convolutional]
|
||||||
filters=1024
|
batch_normalize=1
|
||||||
size=3
|
size=3
|
||||||
stride=1
|
stride=1
|
||||||
pad=1
|
pad=1
|
||||||
|
filters=256
|
||||||
activation=leaky
|
activation=leaky
|
||||||
|
|
||||||
[convolutional]
|
|
||||||
filters=1024
|
|
||||||
size=3
|
|
||||||
stride=1
|
|
||||||
pad=1
|
|
||||||
activation=leaky
|
|
||||||
|
|
||||||
[connected]
|
|
||||||
output=256
|
|
||||||
activation=linear
|
|
||||||
|
|
||||||
[connected]
|
|
||||||
output=4096
|
|
||||||
activation=leaky
|
|
||||||
|
|
||||||
[dropout]
|
|
||||||
probability=.5
|
|
||||||
|
|
||||||
[connected]
|
[connected]
|
||||||
output= 1470
|
output= 1470
|
||||||
activation=linear
|
activation=linear
|
@ -31,7 +31,7 @@ __device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
|
|||||||
__device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;}
|
__device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;}
|
||||||
__device__ float relu_activate_kernel(float x){return x*(x>0);}
|
__device__ float relu_activate_kernel(float x){return x*(x>0);}
|
||||||
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
||||||
__device__ float relie_activate_kernel(float x){return x*(x>0);}
|
__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;}
|
||||||
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
|
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
|
||||||
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
|
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
|
||||||
__device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);}
|
__device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);}
|
||||||
|
@ -36,7 +36,7 @@ static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
|
|||||||
static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
|
static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
|
||||||
static inline float relu_activate(float x){return x*(x>0);}
|
static inline float relu_activate(float x){return x*(x>0);}
|
||||||
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
||||||
static inline float relie_activate(float x){return x*(x>0);}
|
static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
|
||||||
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
|
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
|
||||||
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
|
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
|
||||||
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
|
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
|
||||||
|
@ -95,6 +95,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||||||
args.min = net.min_crop;
|
args.min = net.min_crop;
|
||||||
args.max = net.max_crop;
|
args.max = net.max_crop;
|
||||||
args.angle = net.angle;
|
args.angle = net.angle;
|
||||||
|
args.aspect = net.aspect;
|
||||||
args.exposure = net.exposure;
|
args.exposure = net.exposure;
|
||||||
args.saturation = net.saturation;
|
args.saturation = net.saturation;
|
||||||
args.hue = net.hue;
|
args.hue = net.hue;
|
||||||
|
@ -187,7 +187,7 @@ void denormalize_connected_layer(layer l)
|
|||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for(i = 0; i < l.outputs; ++i){
|
for(i = 0; i < l.outputs; ++i){
|
||||||
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
|
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001);
|
||||||
for(j = 0; j < l.inputs; ++j){
|
for(j = 0; j < l.inputs; ++j){
|
||||||
l.weights[i*l.inputs + j] *= scale;
|
l.weights[i*l.inputs + j] *= scale;
|
||||||
}
|
}
|
||||||
@ -198,6 +198,23 @@ void denormalize_connected_layer(layer l)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void statistics_connected_layer(layer l)
|
||||||
|
{
|
||||||
|
if(l.batch_normalize){
|
||||||
|
printf("Scales ");
|
||||||
|
print_statistics(l.scales, l.outputs);
|
||||||
|
printf("Rolling Mean ");
|
||||||
|
print_statistics(l.rolling_mean, l.outputs);
|
||||||
|
printf("Rolling Variance ");
|
||||||
|
print_statistics(l.rolling_variance, l.outputs);
|
||||||
|
}
|
||||||
|
printf("Biases ");
|
||||||
|
print_statistics(l.biases, l.outputs);
|
||||||
|
printf("Weights ");
|
||||||
|
print_statistics(l.weights, l.outputs);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
|
||||||
void pull_connected_layer(connected_layer l)
|
void pull_connected_layer(connected_layer l)
|
||||||
|
@ -13,6 +13,7 @@ void forward_connected_layer(connected_layer layer, network_state state);
|
|||||||
void backward_connected_layer(connected_layer layer, network_state state);
|
void backward_connected_layer(connected_layer layer, network_state state);
|
||||||
void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay);
|
void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay);
|
||||||
void denormalize_connected_layer(layer l);
|
void denormalize_connected_layer(layer l);
|
||||||
|
void statistics_connected_layer(layer l);
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
void forward_connected_layer_gpu(connected_layer layer, network_state state);
|
void forward_connected_layer_gpu(connected_layer layer, network_state state);
|
||||||
|
@ -254,6 +254,39 @@ void normalize_net(char *cfgfile, char *weightfile, char *outfile)
|
|||||||
save_weights(net, outfile);
|
save_weights(net, outfile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void statistics_net(char *cfgfile, char *weightfile)
|
||||||
|
{
|
||||||
|
gpu_index = -1;
|
||||||
|
network net = parse_network_cfg(cfgfile);
|
||||||
|
if (weightfile) {
|
||||||
|
load_weights(&net, weightfile);
|
||||||
|
}
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < net.n; ++i) {
|
||||||
|
layer l = net.layers[i];
|
||||||
|
if (l.type == CONNECTED && l.batch_normalize) {
|
||||||
|
printf("Connected Layer %d\n", i);
|
||||||
|
statistics_connected_layer(l);
|
||||||
|
}
|
||||||
|
if (l.type == GRU && l.batch_normalize) {
|
||||||
|
printf("GRU Layer %d\n", i);
|
||||||
|
printf("Input Z\n");
|
||||||
|
statistics_connected_layer(*l.input_z_layer);
|
||||||
|
printf("Input R\n");
|
||||||
|
statistics_connected_layer(*l.input_r_layer);
|
||||||
|
printf("Input H\n");
|
||||||
|
statistics_connected_layer(*l.input_h_layer);
|
||||||
|
printf("State Z\n");
|
||||||
|
statistics_connected_layer(*l.state_z_layer);
|
||||||
|
printf("State R\n");
|
||||||
|
statistics_connected_layer(*l.state_r_layer);
|
||||||
|
printf("State H\n");
|
||||||
|
statistics_connected_layer(*l.state_h_layer);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
|
void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
|
||||||
{
|
{
|
||||||
gpu_index = -1;
|
gpu_index = -1;
|
||||||
@ -374,6 +407,8 @@ int main(int argc, char **argv)
|
|||||||
reset_normalize_net(argv[2], argv[3], argv[4]);
|
reset_normalize_net(argv[2], argv[3], argv[4]);
|
||||||
} else if (0 == strcmp(argv[1], "denormalize")){
|
} else if (0 == strcmp(argv[1], "denormalize")){
|
||||||
denormalize_net(argv[2], argv[3], argv[4]);
|
denormalize_net(argv[2], argv[3], argv[4]);
|
||||||
|
} else if (0 == strcmp(argv[1], "statistics")){
|
||||||
|
statistics_net(argv[2], argv[3]);
|
||||||
} else if (0 == strcmp(argv[1], "normalize")){
|
} else if (0 == strcmp(argv[1], "normalize")){
|
||||||
normalize_net(argv[2], argv[3], argv[4]);
|
normalize_net(argv[2], argv[3], argv[4]);
|
||||||
} else if (0 == strcmp(argv[1], "rescale")){
|
} else if (0 == strcmp(argv[1], "rescale")){
|
||||||
|
23
src/data.c
23
src/data.c
@ -100,7 +100,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
|
|||||||
return X;
|
return X;
|
||||||
}
|
}
|
||||||
|
|
||||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
matrix X;
|
matrix X;
|
||||||
@ -110,7 +110,7 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,
|
|||||||
|
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
image im = load_image_color(paths[i], 0, 0);
|
image im = load_image_color(paths[i], 0, 0);
|
||||||
image crop = random_augment_image(im, angle, min, max, size);
|
image crop = random_augment_image(im, angle, aspect, min, max, size);
|
||||||
int flip = rand_r(&data_seed)%2;
|
int flip = rand_r(&data_seed)%2;
|
||||||
if (flip) flip_image(crop);
|
if (flip) flip_image(crop);
|
||||||
random_distort_image(crop, hue, saturation, exposure);
|
random_distort_image(crop, hue, saturation, exposure);
|
||||||
@ -676,15 +676,16 @@ void *load_thread(void *ptr)
|
|||||||
load_args a = *(struct load_args*)ptr;
|
load_args a = *(struct load_args*)ptr;
|
||||||
if(a.exposure == 0) a.exposure = 1;
|
if(a.exposure == 0) a.exposure = 1;
|
||||||
if(a.saturation == 0) a.saturation = 1;
|
if(a.saturation == 0) a.saturation = 1;
|
||||||
|
if(a.aspect == 0) a.aspect = 1;
|
||||||
|
|
||||||
if (a.type == OLD_CLASSIFICATION_DATA){
|
if (a.type == OLD_CLASSIFICATION_DATA){
|
||||||
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||||
} else if (a.type == CLASSIFICATION_DATA){
|
} else if (a.type == CLASSIFICATION_DATA){
|
||||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||||
} else if (a.type == SUPER_DATA){
|
} else if (a.type == SUPER_DATA){
|
||||||
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
|
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
|
||||||
} else if (a.type == STUDY_DATA){
|
} else if (a.type == STUDY_DATA){
|
||||||
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||||
} else if (a.type == WRITING_DATA){
|
} else if (a.type == WRITING_DATA){
|
||||||
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
|
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
|
||||||
} else if (a.type == REGION_DATA){
|
} else if (a.type == REGION_DATA){
|
||||||
@ -699,7 +700,7 @@ void *load_thread(void *ptr)
|
|||||||
*(a.im) = load_image_color(a.path, 0, 0);
|
*(a.im) = load_image_color(a.path, 0, 0);
|
||||||
*(a.resized) = resize_image(*(a.im), a.w, a.h);
|
*(a.resized) = resize_image(*(a.im), a.w, a.h);
|
||||||
} else if (a.type == TAG_DATA){
|
} else if (a.type == TAG_DATA){
|
||||||
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||||
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||||
}
|
}
|
||||||
free(ptr);
|
free(ptr);
|
||||||
@ -741,13 +742,13 @@ data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
|
|||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
|
||||||
{
|
{
|
||||||
data d = {0};
|
data d = {0};
|
||||||
d.indexes = calloc(n, sizeof(int));
|
d.indexes = calloc(n, sizeof(int));
|
||||||
if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
|
if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
|
||||||
d.shallow = 0;
|
d.shallow = 0;
|
||||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
|
||||||
d.y = load_labels_paths(paths, n, labels, k);
|
d.y = load_labels_paths(paths, n, labels, k);
|
||||||
if(m) free(paths);
|
if(m) free(paths);
|
||||||
return d;
|
return d;
|
||||||
@ -783,25 +784,25 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
|||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
|
||||||
{
|
{
|
||||||
if(m) paths = get_random_paths(paths, n, m);
|
if(m) paths = get_random_paths(paths, n, m);
|
||||||
data d = {0};
|
data d = {0};
|
||||||
d.shallow = 0;
|
d.shallow = 0;
|
||||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
|
||||||
d.y = load_labels_paths(paths, n, labels, k);
|
d.y = load_labels_paths(paths, n, labels, k);
|
||||||
if(m) free(paths);
|
if(m) free(paths);
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
|
||||||
{
|
{
|
||||||
if(m) paths = get_random_paths(paths, n, m);
|
if(m) paths = get_random_paths(paths, n, m);
|
||||||
data d = {0};
|
data d = {0};
|
||||||
d.w = size;
|
d.w = size;
|
||||||
d.h = size;
|
d.h = size;
|
||||||
d.shallow = 0;
|
d.shallow = 0;
|
||||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
|
||||||
d.y = load_tags_paths(paths, n, k);
|
d.y = load_tags_paths(paths, n, k);
|
||||||
if(m) free(paths);
|
if(m) free(paths);
|
||||||
return d;
|
return d;
|
||||||
|
@ -52,6 +52,7 @@ typedef struct load_args{
|
|||||||
int scale;
|
int scale;
|
||||||
float jitter;
|
float jitter;
|
||||||
float angle;
|
float angle;
|
||||||
|
float aspect;
|
||||||
float saturation;
|
float saturation;
|
||||||
float exposure;
|
float exposure;
|
||||||
float hue;
|
float hue;
|
||||||
@ -76,11 +77,11 @@ data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
|
|||||||
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
|
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
|
||||||
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
|
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
|
||||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
|
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
|
||||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
||||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||||
data load_go(char *filename);
|
data load_go(char *filename);
|
||||||
|
|
||||||
box_label *read_boxes(char *filename, int *n);
|
box_label *read_boxes(char *filename, int *n);
|
||||||
|
@ -117,6 +117,10 @@ static void convert_detections(float *predictions, int classes, int num, int squ
|
|||||||
int box_index = index * (classes + 5);
|
int box_index = index * (classes + 5);
|
||||||
boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w;
|
boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w;
|
||||||
boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h;
|
boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h;
|
||||||
|
if(1){
|
||||||
|
boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / side * w;
|
||||||
|
boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / side * h;
|
||||||
|
}
|
||||||
boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w;
|
boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w;
|
||||||
boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h;
|
boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h;
|
||||||
for(j = 0; j < classes; ++j){
|
for(j = 0; j < classes; ++j){
|
||||||
@ -237,6 +241,9 @@ void validate_detector(char *cfgfile, char *weightfile)
|
|||||||
free_image(val_resized[t]);
|
free_image(val_resized[t]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for(j = 0; j < classes; ++j){
|
||||||
|
fclose(fps[j]);
|
||||||
|
}
|
||||||
fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
|
fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
29
src/image.c
29
src/image.c
@ -479,7 +479,8 @@ image float_to_image(int w, int h, int c, float *data)
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
|
|
||||||
|
image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect)
|
||||||
{
|
{
|
||||||
int x, y, c;
|
int x, y, c;
|
||||||
float cx = im.w/2.;
|
float cx = im.w/2.;
|
||||||
@ -488,8 +489,8 @@ image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int
|
|||||||
for(c = 0; c < im.c; ++c){
|
for(c = 0; c < im.c; ++c){
|
||||||
for(y = 0; y < h; ++y){
|
for(y = 0; y < h; ++y){
|
||||||
for(x = 0; x < w; ++x){
|
for(x = 0; x < w; ++x){
|
||||||
float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
|
float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx;
|
||||||
float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
|
float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy;
|
||||||
float val = bilinear_interpolate(im, rx, ry, c);
|
float val = bilinear_interpolate(im, rx, ry, c);
|
||||||
set_pixel(rot, x, y, c, val);
|
set_pixel(rot, x, y, c, val);
|
||||||
}
|
}
|
||||||
@ -642,18 +643,23 @@ image random_crop_image(image im, int w, int h)
|
|||||||
return crop;
|
return crop;
|
||||||
}
|
}
|
||||||
|
|
||||||
image random_augment_image(image im, float angle, int low, int high, int size)
|
image random_augment_image(image im, float angle, float aspect, int low, int high, int size)
|
||||||
{
|
{
|
||||||
|
aspect = rand_scale(aspect);
|
||||||
int r = rand_int(low, high);
|
int r = rand_int(low, high);
|
||||||
int min = (im.h < im.w) ? im.h : im.w;
|
int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
|
||||||
float scale = (float)r / min;
|
float scale = (float)r / min;
|
||||||
|
|
||||||
float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
|
float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
|
||||||
int dx = rand_int(0, scale * im.w - size);
|
|
||||||
int dy = rand_int(0, scale * im.h - size);
|
|
||||||
//printf("%d %d\n", dx, dy);
|
|
||||||
|
|
||||||
image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy);
|
float dx = (im.w*scale/aspect - size) / 2.;
|
||||||
|
float dy = (im.h*scale - size) / 2.;
|
||||||
|
if(dx < 0) dx = 0;
|
||||||
|
if(dy < 0) dy = 0;
|
||||||
|
dx = rand_uniform(-dx, dx);
|
||||||
|
dy = rand_uniform(-dy, dy);
|
||||||
|
|
||||||
|
image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy, aspect);
|
||||||
|
|
||||||
return crop;
|
return crop;
|
||||||
}
|
}
|
||||||
@ -971,6 +977,11 @@ void test_resize(char *filename)
|
|||||||
show_image(c4, "C4");
|
show_image(c4, "C4");
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
while(1){
|
while(1){
|
||||||
|
image aug = random_augment_image(im, 0, 320, 448, 320, .75);
|
||||||
|
show_image(aug, "aug");
|
||||||
|
free_image(aug);
|
||||||
|
|
||||||
|
|
||||||
float exposure = 1.15;
|
float exposure = 1.15;
|
||||||
float saturation = 1.15;
|
float saturation = 1.15;
|
||||||
float hue = .05;
|
float hue = .05;
|
||||||
|
@ -31,7 +31,7 @@ image image_distance(image a, image b);
|
|||||||
void scale_image(image m, float s);
|
void scale_image(image m, float s);
|
||||||
image crop_image(image im, int dx, int dy, int w, int h);
|
image crop_image(image im, int dx, int dy, int w, int h);
|
||||||
image random_crop_image(image im, int w, int h);
|
image random_crop_image(image im, int w, int h);
|
||||||
image random_augment_image(image im, float angle, int low, int high, int size);
|
image random_augment_image(image im, float angle, float aspect, int low, int high, int size);
|
||||||
void random_distort_image(image im, float hue, float saturation, float exposure);
|
void random_distort_image(image im, float hue, float saturation, float exposure);
|
||||||
image resize_image(image im, int w, int h);
|
image resize_image(image im, int w, int h);
|
||||||
image resize_min(image im, int min);
|
image resize_min(image im, int min);
|
||||||
|
@ -41,6 +41,7 @@ typedef struct network{
|
|||||||
int max_crop;
|
int max_crop;
|
||||||
int min_crop;
|
int min_crop;
|
||||||
float angle;
|
float angle;
|
||||||
|
float aspect;
|
||||||
float exposure;
|
float exposure;
|
||||||
float saturation;
|
float saturation;
|
||||||
float hue;
|
float hue;
|
||||||
|
@ -497,6 +497,7 @@ void parse_net_options(list *options, network *net)
|
|||||||
net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
|
net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
|
||||||
|
|
||||||
net->angle = option_find_float_quiet(options, "angle", 0);
|
net->angle = option_find_float_quiet(options, "angle", 0);
|
||||||
|
net->aspect = option_find_float_quiet(options, "aspect", 1);
|
||||||
net->saturation = option_find_float_quiet(options, "saturation", 1);
|
net->saturation = option_find_float_quiet(options, "saturation", 1);
|
||||||
net->exposure = option_find_float_quiet(options, "exposure", 1);
|
net->exposure = option_find_float_quiet(options, "exposure", 1);
|
||||||
net->hue = option_find_float_quiet(options, "hue", 0);
|
net->hue = option_find_float_quiet(options, "hue", 0);
|
||||||
|
@ -80,8 +80,8 @@ box get_region_box(float *x, int index, int i, int j, int w, int h, int adjust,
|
|||||||
b.w = logistic_activate(x[index + 2]);
|
b.w = logistic_activate(x[index + 2]);
|
||||||
b.h = logistic_activate(x[index + 3]);
|
b.h = logistic_activate(x[index + 3]);
|
||||||
}
|
}
|
||||||
//if(adjust && b.w < .01) b.w = .01;
|
if(adjust && b.w < .01) b.w = .01;
|
||||||
//if(adjust && b.h < .01) b.h = .01;
|
if(adjust && b.h < .01) b.h = .01;
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,7 +149,6 @@ void forward_region_layer(const region_layer l, network_state state)
|
|||||||
l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
|
l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
|
||||||
if(best_iou > .5) l.delta[index + 4] = 0;
|
if(best_iou > .5) l.delta[index + 4] = 0;
|
||||||
|
|
||||||
/*
|
|
||||||
if(*(state.net.seen) < 6400){
|
if(*(state.net.seen) < 6400){
|
||||||
box truth = {0};
|
box truth = {0};
|
||||||
truth.x = (i + .5)/l.w;
|
truth.x = (i + .5)/l.w;
|
||||||
@ -158,7 +157,6 @@ void forward_region_layer(const region_layer l, network_state state)
|
|||||||
truth.h = .5;
|
truth.h = .5;
|
||||||
delta_region_box(truth, l.output, index, i, j, l.w, l.h, l.delta, LOG, 1);
|
delta_region_box(truth, l.output, index, i, j, l.w, l.h, l.delta, LOG, 1);
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -414,6 +414,13 @@ void mean_arrays(float **a, int n, int els, float *avg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void print_statistics(float *a, int n)
|
||||||
|
{
|
||||||
|
float m = mean_array(a, n);
|
||||||
|
float v = variance_array(a, n);
|
||||||
|
printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v);
|
||||||
|
}
|
||||||
|
|
||||||
float variance_array(float *a, int n)
|
float variance_array(float *a, int n)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -57,6 +57,7 @@ float find_float_arg(int argc, char **argv, char *arg, float def);
|
|||||||
int find_arg(int argc, char* argv[], char *arg);
|
int find_arg(int argc, char* argv[], char *arg);
|
||||||
char *find_char_arg(int argc, char **argv, char *arg, char *def);
|
char *find_char_arg(int argc, char **argv, char *arg, char *def);
|
||||||
int sample_array(float *a, int n);
|
int sample_array(float *a, int n);
|
||||||
|
void print_statistics(float *a, int n);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user