mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
detection layer fixed
This commit is contained in:
parent
2313a8eb54
commit
655f636a42
@ -7,7 +7,6 @@ __device__ float linear_activate_kernel(float x){return x;}
|
||||
__device__ float sigmoid_activate_kernel(float x){return 1./(1. + exp(-x));}
|
||||
__device__ float relu_activate_kernel(float x){return x*(x>0);}
|
||||
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
|
||||
//__device__ float ramp_activate_kernel(float x){return 0;}
|
||||
__device__ float tanh_activate_kernel(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
|
||||
|
||||
__device__ float linear_gradient_kernel(float x){return 1;}
|
||||
|
@ -35,19 +35,6 @@ ACTIVATION get_activation(char *s)
|
||||
return RELU;
|
||||
}
|
||||
|
||||
float linear_activate(float x){return x;}
|
||||
float sigmoid_activate(float x){return 1./(1. + exp(-x));}
|
||||
float relu_activate(float x){return x*(x>0);}
|
||||
float ramp_activate(float x){return x*(x>0)+.1*x;}
|
||||
float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
|
||||
//float tanh_activate(float x){return x - (x*x*x)/3;}
|
||||
|
||||
float linear_gradient(float x){return 1;}
|
||||
float sigmoid_gradient(float x){return (1-x)*x;}
|
||||
float relu_gradient(float x){return (x>0);}
|
||||
float ramp_gradient(float x){return (x>0)+.1;}
|
||||
float tanh_gradient(float x){return 1-x*x;}
|
||||
|
||||
float activate(float x, ACTIVATION a)
|
||||
{
|
||||
switch(a){
|
||||
|
@ -18,5 +18,17 @@ void activate_array_ongpu(float *x, int n, ACTIVATION a);
|
||||
void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta);
|
||||
#endif
|
||||
|
||||
static inline float linear_activate(float x){return x;}
|
||||
static inline float sigmoid_activate(float x){return 1./(1. + exp(-x));}
|
||||
static inline float relu_activate(float x){return x*(x>0);}
|
||||
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
|
||||
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
|
||||
|
||||
static inline float linear_gradient(float x){return 1;}
|
||||
static inline float sigmoid_gradient(float x){return (1-x)*x;}
|
||||
static inline float relu_gradient(float x){return (x>0);}
|
||||
static inline float ramp_gradient(float x){return (x>0)+.1;}
|
||||
static inline float tanh_gradient(float x){return 1-x*x;}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -42,6 +42,68 @@ void train_captcha(char *cfgfile, char *weightfile)
|
||||
}
|
||||
}
|
||||
|
||||
void decode_captcha(char *cfgfile, char *weightfile)
|
||||
{
|
||||
setbuf(stdout, NULL);
|
||||
srand(time(0));
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
set_batch_network(&net, 1);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
char filename[256];
|
||||
while(1){
|
||||
printf("Enter filename: ");
|
||||
fgets(filename, 256, stdin);
|
||||
strtok(filename, "\n");
|
||||
image im = load_image_color(filename, 60, 200);
|
||||
scale_image(im, 1./255.);
|
||||
float *X = im.data;
|
||||
float *predictions = network_predict(net, X);
|
||||
image out = float_to_image(60, 200, 3, predictions);
|
||||
show_image(out, "decoded");
|
||||
cvWaitKey(0);
|
||||
free_image(im);
|
||||
}
|
||||
}
|
||||
|
||||
void encode_captcha(char *cfgfile, char *weightfile)
|
||||
{
|
||||
float avg_loss = -1;
|
||||
srand(time(0));
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = 1024;
|
||||
int i = net.seen/imgs;
|
||||
list *plist = get_paths("/data/captcha/encode.list");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
clock_t time;
|
||||
while(1){
|
||||
++i;
|
||||
time=clock();
|
||||
data train = load_data_captcha_encode(paths, imgs, plist->size, 60, 200);
|
||||
scale_data_rows(train, 1./255);
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
float loss = train_network(net, train);
|
||||
net.seen += imgs;
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
|
||||
free_matrix(train.X);
|
||||
if(i%100==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void validate_captcha(char *cfgfile, char *weightfile)
|
||||
{
|
||||
@ -115,6 +177,8 @@ void run_captcha(int argc, char **argv)
|
||||
char *weights = (argc > 4) ? argv[4] : 0;
|
||||
if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights);
|
||||
}
|
||||
|
||||
|
@ -13,15 +13,6 @@ extern void run_imagenet(int argc, char **argv);
|
||||
extern void run_detection(int argc, char **argv);
|
||||
extern void run_captcha(int argc, char **argv);
|
||||
|
||||
void convert(char *cfgfile, char *outfile, char *weightfile)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
save_network(net, outfile);
|
||||
}
|
||||
|
||||
void del_arg(int argc, char **argv, int index)
|
||||
{
|
||||
int i;
|
||||
@ -57,20 +48,48 @@ int find_int_arg(int argc, char **argv, char *arg, int def)
|
||||
return def;
|
||||
}
|
||||
|
||||
void scale_rate(char *filename, float scale)
|
||||
void change_rate(char *filename, float scale, float add)
|
||||
{
|
||||
// Ready for some weird shit??
|
||||
FILE *fp = fopen(filename, "r+b");
|
||||
if(!fp) file_error(filename);
|
||||
float rate = 0;
|
||||
fread(&rate, sizeof(float), 1, fp);
|
||||
printf("Scaling learning rate from %f to %f\n", rate, rate*scale);
|
||||
rate = rate*scale;
|
||||
printf("Scaling learning rate from %f to %f\n", rate, rate*scale+add);
|
||||
rate = rate*scale + add;
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
fwrite(&rate, sizeof(float), 1, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
void partial(char *cfgfile, char *weightfile, char *outfile, int max)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights_upto(&net, weightfile, max);
|
||||
}
|
||||
save_weights(net, outfile);
|
||||
}
|
||||
|
||||
void convert(char *cfgfile, char *outfile, char *weightfile)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
save_network(net, outfile);
|
||||
}
|
||||
|
||||
void visualize(char *cfgfile, char *weightfile)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
visualize_network(net);
|
||||
cvWaitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
//test_convolutional_layer();
|
||||
@ -90,11 +109,21 @@ int main(int argc, char **argv)
|
||||
#endif
|
||||
|
||||
if(0==strcmp(argv[1], "imagenet")){
|
||||
run_imagenet(argc, argv);
|
||||
run_imagenet(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "detection")){
|
||||
run_detection(argc, argv);
|
||||
run_detection(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "captcha")){
|
||||
run_captcha(argc, argv);
|
||||
run_captcha(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "change")){
|
||||
change_rate(argv[2], atof(argv[3]), (argc > 4) ? atof(argv[4]) : 0);
|
||||
} else if (0 == strcmp(argv[1], "convert")){
|
||||
convert(argv[2], argv[3], (argc > 4) ? argv[4] : 0);
|
||||
} else if (0 == strcmp(argv[1], "partial")){
|
||||
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
|
||||
} else if (0 == strcmp(argv[1], "visualize")){
|
||||
visualize(argv[2], (argc > 3) ? argv[3] : 0);
|
||||
} else {
|
||||
fprintf(stderr, "Not an option: %s\n", argv[1]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
11
src/data.c
11
src/data.c
@ -90,6 +90,7 @@ void fill_truth_detection(char *path, float *truth, int classes, int height, int
|
||||
float dh = (y - j*box_height)/box_height;
|
||||
//printf("%d %d %d %f %f\n", id, i, j, dh, dw);
|
||||
int index = (i+j*num_width)*(4+classes);
|
||||
if(truth[index+classes]) continue;
|
||||
truth[index+id] = 1;
|
||||
index += classes;
|
||||
truth[index++] = dh;
|
||||
@ -148,6 +149,16 @@ data load_data_captcha(char **paths, int n, int m, int k, int h, int w)
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_captcha_encode(char **paths, int n, int m, int h, int w)
|
||||
{
|
||||
if(m) paths = get_random_paths(paths, n, m);
|
||||
data d;
|
||||
d.shallow = 0;
|
||||
d.X = load_image_paths(paths, n, h, w);
|
||||
d.y = d.X;
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
}
|
||||
|
||||
void fill_truth(char *path, char **labels, int k, float *truth)
|
||||
{
|
||||
|
@ -16,6 +16,7 @@ void free_data(data d);
|
||||
|
||||
void print_letters(float *pred, int n);
|
||||
data load_data_captcha(char **paths, int n, int m, int k, int h, int w);
|
||||
data load_data_captcha_encode(char **paths, int n, int m, int h, int w);
|
||||
data load_data(char **paths, int n, int m, char **labels, int k, int h, int w);
|
||||
pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d);
|
||||
|
||||
|
@ -53,13 +53,10 @@ void forward_detection_layer(const detection_layer layer, float *in, float *trut
|
||||
layer.output[out_i++] = scale*in[in_i++];
|
||||
}
|
||||
softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes);
|
||||
activate_array(layer.output+out_i, layer.coords, SIGMOID);
|
||||
activate_array(in+in_i, layer.coords, SIGMOID);
|
||||
for(j = 0; j < layer.coords; ++j){
|
||||
layer.output[out_i++] = mask*in[in_i++];
|
||||
}
|
||||
//printf("%d\n", mask);
|
||||
//for(j = 0; j < layer.classes+layer.coords; ++j) printf("%f ", layer.output[i*(layer.classes+layer.coords)+j]);
|
||||
//printf ("\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -78,10 +75,10 @@ void backward_detection_layer(const detection_layer layer, float *in, float *del
|
||||
delta[in_i++] = scale*layer.delta[out_i++];
|
||||
}
|
||||
|
||||
gradient_array(layer.output + out_i, layer.coords, SIGMOID, layer.delta + out_i);
|
||||
for(j = 0; j < layer.coords; ++j){
|
||||
delta[in_i++] = layer.delta[out_i++];
|
||||
}
|
||||
gradient_array(in + in_i - layer.coords, layer.coords, SIGMOID, layer.delta + out_i - layer.coords);
|
||||
if(layer.rescore) delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta;
|
||||
}
|
||||
}
|
||||
|
@ -775,7 +775,7 @@ void save_weights(network net, char *filename)
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
void load_weights(network *net, char *filename)
|
||||
void load_weights_upto(network *net, char *filename, int cutoff)
|
||||
{
|
||||
fprintf(stderr, "Loading weights from %s\n", filename);
|
||||
FILE *fp = fopen(filename, "r");
|
||||
@ -788,7 +788,7 @@ void load_weights(network *net, char *filename)
|
||||
set_learning_network(net, net->learning_rate, net->momentum, net->decay);
|
||||
|
||||
int i;
|
||||
for(i = 0; i < net->n; ++i){
|
||||
for(i = 0; i < net->n && i < cutoff; ++i){
|
||||
if(net->types[i] == CONVOLUTIONAL){
|
||||
convolutional_layer layer = *(convolutional_layer *) net->layers[i];
|
||||
int num = layer.n*layer.c*layer.size*layer.size;
|
||||
@ -825,6 +825,11 @@ void load_weights(network *net, char *filename)
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
void load_weights(network *net, char *filename)
|
||||
{
|
||||
load_weights_upto(net, filename, net->n);
|
||||
}
|
||||
|
||||
void save_network(network net, char *filename)
|
||||
{
|
||||
FILE *fp = fopen(filename, "w");
|
||||
|
Loading…
Reference in New Issue
Block a user