detection layer fixed

This commit is contained in:
Joseph Redmon 2015-03-08 11:25:28 -07:00
parent 2313a8eb54
commit 655f636a42
9 changed files with 141 additions and 36 deletions

View File

@ -7,7 +7,6 @@ __device__ float linear_activate_kernel(float x){return x;}
__device__ float sigmoid_activate_kernel(float x){return 1./(1. + exp(-x));}
__device__ float relu_activate_kernel(float x){return x*(x>0);}
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
//__device__ float ramp_activate_kernel(float x){return 0;}
__device__ float tanh_activate_kernel(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
__device__ float linear_gradient_kernel(float x){return 1;}

View File

@ -35,19 +35,6 @@ ACTIVATION get_activation(char *s)
return RELU;
}
float linear_activate(float x){return x;}
float sigmoid_activate(float x){return 1./(1. + exp(-x));}
float relu_activate(float x){return x*(x>0);}
float ramp_activate(float x){return x*(x>0)+.1*x;}
float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
//float tanh_activate(float x){return x - (x*x*x)/3;}
float linear_gradient(float x){return 1;}
float sigmoid_gradient(float x){return (1-x)*x;}
float relu_gradient(float x){return (x>0);}
float ramp_gradient(float x){return (x>0)+.1;}
float tanh_gradient(float x){return 1-x*x;}
float activate(float x, ACTIVATION a)
{
switch(a){

View File

@ -18,5 +18,17 @@ void activate_array_ongpu(float *x, int n, ACTIVATION a);
void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta);
#endif
static inline float linear_activate(float x){return x;}
static inline float sigmoid_activate(float x){return 1./(1. + exp(-x));}
static inline float relu_activate(float x){return x*(x>0);}
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
static inline float linear_gradient(float x){return 1;}
static inline float sigmoid_gradient(float x){return (1-x)*x;}
static inline float relu_gradient(float x){return (x>0);}
static inline float ramp_gradient(float x){return (x>0)+.1;}
static inline float tanh_gradient(float x){return 1-x*x;}
#endif

View File

@ -42,6 +42,68 @@ void train_captcha(char *cfgfile, char *weightfile)
}
}
void decode_captcha(char *cfgfile, char *weightfile)
{
setbuf(stdout, NULL);
srand(time(0));
network net = parse_network_cfg(cfgfile);
set_batch_network(&net, 1);
if(weightfile){
load_weights(&net, weightfile);
}
char filename[256];
while(1){
printf("Enter filename: ");
fgets(filename, 256, stdin);
strtok(filename, "\n");
image im = load_image_color(filename, 60, 200);
scale_image(im, 1./255.);
float *X = im.data;
float *predictions = network_predict(net, X);
image out = float_to_image(60, 200, 3, predictions);
show_image(out, "decoded");
cvWaitKey(0);
free_image(im);
}
}
void encode_captcha(char *cfgfile, char *weightfile)
{
float avg_loss = -1;
srand(time(0));
char *base = basecfg(cfgfile);
printf("%s\n", base);
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = 1024;
int i = net.seen/imgs;
list *plist = get_paths("/data/captcha/encode.list");
char **paths = (char **)list_to_array(plist);
printf("%d\n", plist->size);
clock_t time;
while(1){
++i;
time=clock();
data train = load_data_captcha_encode(paths, imgs, plist->size, 60, 200);
scale_data_rows(train, 1./255);
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
float loss = train_network(net, train);
net.seen += imgs;
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
free_matrix(train.X);
if(i%100==0){
char buff[256];
sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i);
save_weights(net, buff);
}
}
}
void validate_captcha(char *cfgfile, char *weightfile)
{
@ -115,6 +177,8 @@ void run_captcha(int argc, char **argv)
char *weights = (argc > 4) ? argv[4] : 0;
if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights);
else if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights);
else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights);
else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights);
}

View File

@ -13,15 +13,6 @@ extern void run_imagenet(int argc, char **argv);
extern void run_detection(int argc, char **argv);
extern void run_captcha(int argc, char **argv);
void convert(char *cfgfile, char *outfile, char *weightfile)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
save_network(net, outfile);
}
void del_arg(int argc, char **argv, int index)
{
int i;
@ -57,20 +48,48 @@ int find_int_arg(int argc, char **argv, char *arg, int def)
return def;
}
void scale_rate(char *filename, float scale)
void change_rate(char *filename, float scale, float add)
{
// Ready for some weird shit??
FILE *fp = fopen(filename, "r+b");
if(!fp) file_error(filename);
float rate = 0;
fread(&rate, sizeof(float), 1, fp);
printf("Scaling learning rate from %f to %f\n", rate, rate*scale);
rate = rate*scale;
printf("Scaling learning rate from %f to %f\n", rate, rate*scale+add);
rate = rate*scale + add;
fseek(fp, 0, SEEK_SET);
fwrite(&rate, sizeof(float), 1, fp);
fclose(fp);
}
void partial(char *cfgfile, char *weightfile, char *outfile, int max)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights_upto(&net, weightfile, max);
}
save_weights(net, outfile);
}
void convert(char *cfgfile, char *outfile, char *weightfile)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
save_network(net, outfile);
}
void visualize(char *cfgfile, char *weightfile)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
visualize_network(net);
cvWaitKey(0);
}
int main(int argc, char **argv)
{
//test_convolutional_layer();
@ -90,11 +109,21 @@ int main(int argc, char **argv)
#endif
if(0==strcmp(argv[1], "imagenet")){
run_imagenet(argc, argv);
run_imagenet(argc, argv);
} else if (0 == strcmp(argv[1], "detection")){
run_detection(argc, argv);
run_detection(argc, argv);
} else if (0 == strcmp(argv[1], "captcha")){
run_captcha(argc, argv);
run_captcha(argc, argv);
} else if (0 == strcmp(argv[1], "change")){
change_rate(argv[2], atof(argv[3]), (argc > 4) ? atof(argv[4]) : 0);
} else if (0 == strcmp(argv[1], "convert")){
convert(argv[2], argv[3], (argc > 4) ? argv[4] : 0);
} else if (0 == strcmp(argv[1], "partial")){
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "visualize")){
visualize(argv[2], (argc > 3) ? argv[3] : 0);
} else {
fprintf(stderr, "Not an option: %s\n", argv[1]);
}
return 0;
}

View File

@ -90,6 +90,7 @@ void fill_truth_detection(char *path, float *truth, int classes, int height, int
float dh = (y - j*box_height)/box_height;
//printf("%d %d %d %f %f\n", id, i, j, dh, dw);
int index = (i+j*num_width)*(4+classes);
if(truth[index+classes]) continue;
truth[index+id] = 1;
index += classes;
truth[index++] = dh;
@ -148,6 +149,16 @@ data load_data_captcha(char **paths, int n, int m, int k, int h, int w)
return d;
}
data load_data_captcha_encode(char **paths, int n, int m, int h, int w)
{
if(m) paths = get_random_paths(paths, n, m);
data d;
d.shallow = 0;
d.X = load_image_paths(paths, n, h, w);
d.y = d.X;
if(m) free(paths);
return d;
}
void fill_truth(char *path, char **labels, int k, float *truth)
{

View File

@ -16,6 +16,7 @@ void free_data(data d);
void print_letters(float *pred, int n);
data load_data_captcha(char **paths, int n, int m, int k, int h, int w);
data load_data_captcha_encode(char **paths, int n, int m, int h, int w);
data load_data(char **paths, int n, int m, char **labels, int k, int h, int w);
pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d);

View File

@ -53,13 +53,10 @@ void forward_detection_layer(const detection_layer layer, float *in, float *trut
layer.output[out_i++] = scale*in[in_i++];
}
softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes);
activate_array(layer.output+out_i, layer.coords, SIGMOID);
activate_array(in+in_i, layer.coords, SIGMOID);
for(j = 0; j < layer.coords; ++j){
layer.output[out_i++] = mask*in[in_i++];
}
//printf("%d\n", mask);
//for(j = 0; j < layer.classes+layer.coords; ++j) printf("%f ", layer.output[i*(layer.classes+layer.coords)+j]);
//printf ("\n");
}
}
@ -78,10 +75,10 @@ void backward_detection_layer(const detection_layer layer, float *in, float *del
delta[in_i++] = scale*layer.delta[out_i++];
}
gradient_array(layer.output + out_i, layer.coords, SIGMOID, layer.delta + out_i);
for(j = 0; j < layer.coords; ++j){
delta[in_i++] = layer.delta[out_i++];
}
gradient_array(in + in_i - layer.coords, layer.coords, SIGMOID, layer.delta + out_i - layer.coords);
if(layer.rescore) delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta;
}
}

View File

@ -775,7 +775,7 @@ void save_weights(network net, char *filename)
fclose(fp);
}
void load_weights(network *net, char *filename)
void load_weights_upto(network *net, char *filename, int cutoff)
{
fprintf(stderr, "Loading weights from %s\n", filename);
FILE *fp = fopen(filename, "r");
@ -788,7 +788,7 @@ void load_weights(network *net, char *filename)
set_learning_network(net, net->learning_rate, net->momentum, net->decay);
int i;
for(i = 0; i < net->n; ++i){
for(i = 0; i < net->n && i < cutoff; ++i){
if(net->types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *) net->layers[i];
int num = layer.n*layer.c*layer.size*layer.size;
@ -825,6 +825,11 @@ void load_weights(network *net, char *filename)
fclose(fp);
}
void load_weights(network *net, char *filename)
{
load_weights_upto(net, filename, net->n);
}
void save_network(network net, char *filename)
{
FILE *fp = fopen(filename, "w");