hope i didn't break anything

This commit is contained in:
Joseph Redmon 2016-06-02 15:25:24 -07:00
parent 881d6ee9b6
commit ec3d050a76
17 changed files with 834 additions and 550 deletions

View File

@ -3,7 +3,7 @@ CUDNN=0
OPENCV=0 OPENCV=0
DEBUG=0 DEBUG=0
ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20 ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
VPATH=./src/ VPATH=./src/
EXEC=darknet EXEC=darknet

View File

@ -14,7 +14,6 @@ power=4
max_batches=500000 max_batches=500000
[convolutional] [convolutional]
batch_normalize=1
filters=16 filters=16
size=3 size=3
stride=1 stride=1
@ -26,7 +25,6 @@ size=2
stride=2 stride=2
[convolutional] [convolutional]
batch_normalize=1
filters=32 filters=32
size=3 size=3
stride=1 stride=1
@ -38,7 +36,6 @@ size=2
stride=2 stride=2
[convolutional] [convolutional]
batch_normalize=1
filters=64 filters=64
size=3 size=3
stride=1 stride=1
@ -50,7 +47,6 @@ size=2
stride=2 stride=2
[convolutional] [convolutional]
batch_normalize=1
filters=128 filters=128
size=3 size=3
stride=1 stride=1
@ -62,7 +58,6 @@ size=2
stride=2 stride=2
[convolutional] [convolutional]
batch_normalize=1
filters=256 filters=256
size=3 size=3
stride=1 stride=1
@ -74,7 +69,6 @@ size=2
stride=2 stride=2
[convolutional] [convolutional]
batch_normalize=1
filters=512 filters=512
size=3 size=3
stride=1 stride=1
@ -86,7 +80,6 @@ size=2
stride=2 stride=2
[convolutional] [convolutional]
batch_normalize=1
filters=1024 filters=1024
size=3 size=3
stride=1 stride=1

9
cfg/imagenet1k.dataset Normal file
View File

@ -0,0 +1,9 @@
classes=1000
labels = data/inet.labels.list
names = data/shortnames.txt
train = /data/imagenet/imagenet1k.train.list
valid = /data/imagenet/imagenet1k.valid.list
top=5
test = /Users/pjreddie/Documents/sites/selfie/paths.list
backup = /home/pjreddie/backup/

View File

@ -38,7 +38,7 @@ list *read_data_cfg(char *filename)
return options; return options;
} }
void train_classifier(char *datacfg, char *cfgfile, char *weightfile) void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
{ {
data_seed = time(0); data_seed = time(0);
srand(time(0)); srand(time(0));
@ -49,6 +49,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile)
if(weightfile){ if(weightfile){
load_weights(&net, weightfile); load_weights(&net, weightfile);
} }
if(clear) *net.seen = 0;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch; int imgs = net.batch;
@ -96,14 +97,14 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile)
printf("Loaded: %lf seconds\n", sec(clock()-time)); printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock(); time=clock();
/* /*
int u; int u;
for(u = 0; u < net.batch; ++u){ for(u = 0; u < net.batch; ++u){
image im = float_to_image(net.w, net.h, 3, train.X.vals[u]); image im = float_to_image(net.w, net.h, 3, train.X.vals[u]);
show_image(im, "loaded"); show_image(im, "loaded");
cvWaitKey(0); cvWaitKey(0);
} }
*/ */
float loss = train_network(net, train); float loss = train_network(net, train);
if(avg_loss == -1) avg_loss = loss; if(avg_loss == -1) avg_loss = loss;
@ -116,7 +117,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile)
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
save_weights(net, buff); save_weights(net, buff);
} }
if(*net.seen%100 == 0){ if(get_current_batch(net)%100 == 0){
char buff[256]; char buff[256];
sprintf(buff, "%s/%s.backup",backup_directory,base); sprintf(buff, "%s/%s.backup",backup_directory,base);
save_weights(net, buff); save_weights(net, buff);
@ -378,8 +379,8 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
//cvWaitKey(0); //cvWaitKey(0);
float *pred = network_predict(net, crop.data); float *pred = network_predict(net, crop.data);
if(resized.data != im.data) free_image(resized);
free_image(im); free_image(im);
free_image(resized);
free_image(crop); free_image(crop);
top_k(pred, classes, topk, indexes); top_k(pred, classes, topk, indexes);
@ -441,7 +442,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
flip_image(r); flip_image(r);
p = network_predict(net, r.data); p = network_predict(net, r.data);
axpy_cpu(classes, 1, p, 1, pred, 1); axpy_cpu(classes, 1, p, 1, pred, 1);
free_image(r); if(r.data != im.data) free_image(r);
} }
free_image(im); free_image(im);
top_k(pred, classes, topk, indexes); top_k(pred, classes, topk, indexes);
@ -501,6 +502,46 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
} }
} }
void label_classifier(char *datacfg, char *filename, char *weightfile)
{
int i;
network net = parse_network_cfg(filename);
set_batch_network(&net, 1);
if(weightfile){
load_weights(&net, weightfile);
}
srand(time(0));
list *options = read_data_cfg(datacfg);
char *label_list = option_find_str(options, "names", "data/labels.list");
char *test_list = option_find_str(options, "test", "data/train.list");
int classes = option_find_int(options, "classes", 2);
char **labels = get_labels(label_list);
list *plist = get_paths(test_list);
char **paths = (char **)list_to_array(plist);
int m = plist->size;
free_list(plist);
for(i = 0; i < m; ++i){
image im = load_image_color(paths[i], 0, 0);
image resized = resize_min(im, net.w);
image crop = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h);
float *pred = network_predict(net, crop.data);
if(resized.data != im.data) free_image(resized);
free_image(im);
free_image(crop);
int ind = max_index(pred, classes);
printf("%s\n", labels[ind]);
}
}
void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer)
{ {
int curr = 0; int curr = 0;
@ -649,6 +690,7 @@ void run_classifier(int argc, char **argv)
} }
int cam_index = find_int_arg(argc, argv, "-c", 0); int cam_index = find_int_arg(argc, argv, "-c", 0);
int clear = find_arg(argc, argv, "-clear");
char *data = argv[3]; char *data = argv[3];
char *cfg = argv[4]; char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0; char *weights = (argc > 5) ? argv[5] : 0;
@ -656,9 +698,10 @@ void run_classifier(int argc, char **argv)
char *layer_s = (argc > 7) ? argv[7]: 0; char *layer_s = (argc > 7) ? argv[7]: 0;
int layer = layer_s ? atoi(layer_s) : -1; int layer = layer_s ? atoi(layer_s) : -1;
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename); if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename);
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights); else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, clear);
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_classifier(data, cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_classifier(data, cfg, weights);
else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights);
else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights);

View File

@ -161,6 +161,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
l.filter_updates_gpu); l.filter_updates_gpu);
if(state.delta){ if(state.delta){
if(l.binary || l.xnor) swap_binary(&l);
cudnnConvolutionBackwardData(cudnn_handle(), cudnnConvolutionBackwardData(cudnn_handle(),
&one, &one,
l.filterDesc, l.filterDesc,
@ -174,6 +175,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
&one, &one,
l.dsrcTensorDesc, l.dsrcTensorDesc,
state.delta); state.delta);
if(l.binary || l.xnor) swap_binary(&l);
} }
#else #else

View File

@ -88,8 +88,8 @@ image get_convolutional_delta(convolutional_layer l)
return float_to_image(w,h,c,l.delta); return float_to_image(w,h,c,l.delta);
} }
#ifdef CUDNN
size_t get_workspace_size(layer l){ size_t get_workspace_size(layer l){
#ifdef CUDNN
size_t most = 0; size_t most = 0;
size_t s = 0; size_t s = 0;
cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(),
@ -117,8 +117,10 @@ size_t get_workspace_size(layer l){
&s); &s);
if (s > most) most = s; if (s > most) most = s;
return most; return most;
#else
return (size_t)l.out_h*l.out_w*l.size*l.size*l.c*sizeof(float);
#endif
} }
#endif
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor) convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor)
{ {
@ -154,8 +156,6 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
l.outputs = l.out_h * l.out_w * l.out_c; l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = l.w * l.h * l.c; l.inputs = l.w * l.h * l.c;
l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
l.workspace_size = out_h*out_w*size*size*c*sizeof(float);
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
@ -255,10 +255,9 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
0, 0,
&l.bf_algo); &l.bf_algo);
#endif
#endif
l.workspace_size = get_workspace_size(l); l.workspace_size = get_workspace_size(l);
#endif
#endif
l.activation = activation; l.activation = activation;
fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
@ -315,8 +314,6 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
l->outputs = l->out_h * l->out_w * l->out_c; l->outputs = l->out_h * l->out_w * l->out_c;
l->inputs = l->w * l->h * l->c; l->inputs = l->w * l->h * l->c;
l->col_image = realloc(l->col_image,
out_h*out_w*l->size*l->size*l->c*sizeof(float));
l->output = realloc(l->output, l->output = realloc(l->output,
l->batch*out_h * out_w * l->n*sizeof(float)); l->batch*out_h * out_w * l->n*sizeof(float));
l->delta = realloc(l->delta, l->delta = realloc(l->delta,
@ -328,7 +325,43 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*l->n); l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*l->n);
l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*l->n); l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*l->n);
#ifdef CUDNN
cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
cudnnSetFilter4dDescriptor(l->dfilterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
cudnnSetFilter4dDescriptor(l->filterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
int padding = l->pad ? l->size/2 : 0;
cudnnSetConvolution2dDescriptor(l->convDesc, padding, padding, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
l->srcTensorDesc,
l->filterDesc,
l->convDesc,
l->dstTensorDesc,
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
0,
&l->fw_algo);
cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
l->filterDesc,
l->ddstTensorDesc,
l->convDesc,
l->dsrcTensorDesc,
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
0,
&l->bd_algo);
cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
l->srcTensorDesc,
l->ddstTensorDesc,
l->convDesc,
l->dfilterDesc,
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
0,
&l->bf_algo);
#endif
#endif #endif
l->workspace_size = get_workspace_size(*l);
} }
void add_bias(float *output, float *biases, int batch, int n, int size) void add_bias(float *output, float *biases, int batch, int n, int size)
@ -386,7 +419,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
int n = out_h*out_w; int n = out_h*out_w;
char *a = l.cfilters; char *a = l.cfilters;
float *b = l.col_image; float *b = state.workspace;
float *c = l.output; float *c = l.output;
for(i = 0; i < l.batch; ++i){ for(i = 0; i < l.batch; ++i){
@ -407,7 +440,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
int n = out_h*out_w; int n = out_h*out_w;
float *a = l.filters; float *a = l.filters;
float *b = l.col_image; float *b = state.workspace;
float *c = l.output; float *c = l.output;
for(i = 0; i < l.batch; ++i){ for(i = 0; i < l.batch; ++i){
@ -439,7 +472,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
for(i = 0; i < l.batch; ++i){ for(i = 0; i < l.batch; ++i){
float *a = l.delta + i*m*k; float *a = l.delta + i*m*k;
float *b = l.col_image; float *b = state.workspace;
float *c = l.filter_updates; float *c = l.filter_updates;
float *im = state.input+i*l.c*l.h*l.w; float *im = state.input+i*l.c*l.h*l.w;
@ -451,11 +484,11 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
if(state.delta){ if(state.delta){
a = l.filters; a = l.filters;
b = l.delta + i*m*k; b = l.delta + i*m*k;
c = l.col_image; c = state.workspace;
gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);
col2im_cpu(l.col_image, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w); col2im_cpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
} }
} }
} }

View File

@ -270,6 +270,8 @@ int main(int argc, char **argv)
run_dice(argc, argv); run_dice(argc, argv);
} else if (0 == strcmp(argv[1], "writing")){ } else if (0 == strcmp(argv[1], "writing")){
run_writing(argc, argv); run_writing(argc, argv);
} else if (0 == strcmp(argv[1], "3d")){
composite_3d(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "test")){ } else if (0 == strcmp(argv[1], "test")){
test_resize(argv[2]); test_resize(argv[2]);
} else if (0 == strcmp(argv[1], "captcha")){ } else if (0 == strcmp(argv[1], "captcha")){

View File

@ -271,7 +271,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
free(boxes); free(boxes);
} }
void fill_truth_detection(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
{ {
char *labelpath = find_replace(path, "images", "labels"); char *labelpath = find_replace(path, "images", "labels");
labelpath = find_replace(labelpath, "JPEGImages", "labels"); labelpath = find_replace(labelpath, "JPEGImages", "labels");
@ -283,7 +283,7 @@ void fill_truth_detection(char *path, float *truth, int classes, int flip, float
box_label *boxes = read_boxes(labelpath, &count); box_label *boxes = read_boxes(labelpath, &count);
randomize_boxes(boxes, count); randomize_boxes(boxes, count);
correct_boxes(boxes, count, dx, dy, sx, sy, flip); correct_boxes(boxes, count, dx, dy, sx, sy, flip);
if(count > 17) count = 17; if(count > num_boxes) count = num_boxes;
float x,y,w,h; float x,y,w,h;
int id; int id;
int i; int i;
@ -297,11 +297,11 @@ void fill_truth_detection(char *path, float *truth, int classes, int flip, float
if (w < .01 || h < .01) continue; if (w < .01 || h < .01) continue;
truth[i*5] = id; truth[i*5+0] = id;
truth[i*5+2] = x; truth[i*5+1] = x;
truth[i*5+3] = y; truth[i*5+2] = y;
truth[i*5+4] = w; truth[i*5+3] = w;
truth[i*5+5] = h; truth[i*5+4] = h;
} }
free(boxes); free(boxes);
} }
@ -601,7 +601,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
return d; return d;
} }
data load_data_detection(int n, int boxes, char **paths, int m, int w, int h, int classes, float jitter) data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter)
{ {
char **random_paths = get_random_paths(paths, n, m); char **random_paths = get_random_paths(paths, n, m);
int i; int i;
@ -643,7 +643,7 @@ data load_data_detection(int n, int boxes, char **paths, int m, int w, int h, in
if(flip) flip_image(sized); if(flip) flip_image(sized);
d.X.vals[i] = sized.data; d.X.vals[i] = sized.data;
fill_truth_detection(random_paths[i], d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy); fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
free_image(orig); free_image(orig);
free_image(cropped); free_image(cropped);
@ -669,12 +669,12 @@ void *load_thread(void *ptr)
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size); *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
} else if (a.type == STUDY_DATA){ } else if (a.type == STUDY_DATA){
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size); *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
} else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.num_boxes, a.paths, a.m, a.classes, a.w, a.h, a.background);
} else if (a.type == WRITING_DATA){ } else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == REGION_DATA){ } else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter); *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
} else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
} else if (a.type == SWAG_DATA){ } else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){ } else if (a.type == COMPARE_DATA){

View File

@ -70,7 +70,7 @@ void print_letters(float *pred, int n);
data load_data_captcha(char **paths, int n, int m, int k, int w, int h); data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h); data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h); data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, int boxes, char **paths, int m, int w, int h, int classes, float jitter); data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size); data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size); data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size); data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size);

File diff suppressed because it is too large Load Diff

View File

@ -44,6 +44,7 @@ void saturate_exposure_image(image im, float sat, float exposure);
void hsv_to_rgb(image im); void hsv_to_rgb(image im);
void rgbgr_image(image im); void rgbgr_image(image im);
void constrain_image(image im); void constrain_image(image im);
void composite_3d(char *f1, char *f2, char *out);
image grayscale_image(image im); image grayscale_image(image im);
image threshold_image(image im, float thresh); image threshold_image(image im, float thresh);

View File

@ -50,6 +50,7 @@ struct layer{
int h,w,c; int h,w,c;
int out_h, out_w, out_c; int out_h, out_w, out_c;
int n; int n;
int max_boxes;
int groups; int groups;
int size; int size;
int side; int side;

View File

@ -137,6 +137,7 @@ network make_network(int n)
void forward_network(network net, network_state state) void forward_network(network net, network_state state)
{ {
state.workspace = net.workspace;
int i; int i;
for(i = 0; i < net.n; ++i){ for(i = 0; i < net.n; ++i){
state.index = i; state.index = i;
@ -400,6 +401,7 @@ int resize_network(network *net, int w, int h)
net->w = w; net->w = w;
net->h = h; net->h = h;
int inputs = 0; int inputs = 0;
size_t workspace_size = 0;
//fprintf(stderr, "Resizing to %d x %d...", w, h); //fprintf(stderr, "Resizing to %d x %d...", w, h);
//fflush(stderr); //fflush(stderr);
for (i = 0; i < net->n; ++i){ for (i = 0; i < net->n; ++i){
@ -419,12 +421,20 @@ int resize_network(network *net, int w, int h)
}else{ }else{
error("Cannot resize this type of layer"); error("Cannot resize this type of layer");
} }
if(l.workspace_size > workspace_size) workspace_size = l.workspace_size;
inputs = l.outputs; inputs = l.outputs;
net->layers[i] = l; net->layers[i] = l;
w = l.out_w; w = l.out_w;
h = l.out_h; h = l.out_h;
if(l.type == AVGPOOL) break; if(l.type == AVGPOOL) break;
} }
#ifdef GPU
cuda_free(net->workspace);
net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
#else
free(net->workspace);
net->workspace = calloc(1, (workspace_size-1)/sizeof(float)+1);
#endif
//fprintf(stderr, " Done!\n"); //fprintf(stderr, " Done!\n");
return 0; return 0;
} }

View File

@ -257,6 +257,7 @@ detection_layer parse_detection(list *options, size_params params)
layer.softmax = option_find_int(options, "softmax", 0); layer.softmax = option_find_int(options, "softmax", 0);
layer.sqrt = option_find_int(options, "sqrt", 0); layer.sqrt = option_find_int(options, "sqrt", 0);
layer.max_boxes = option_find_int_quiet(options, "max",30);
layer.coord_scale = option_find_float(options, "coord_scale", 1); layer.coord_scale = option_find_float(options, "coord_scale", 1);
layer.forced = option_find_int(options, "forced", 0); layer.forced = option_find_int(options, "forced", 0);
layer.object_scale = option_find_float(options, "object_scale", 1); layer.object_scale = option_find_float(options, "object_scale", 1);
@ -600,8 +601,11 @@ network parse_network_cfg(char *filename)
net.outputs = get_network_output_size(net); net.outputs = get_network_output_size(net);
net.output = get_network_output(net); net.output = get_network_output(net);
if(workspace_size){ if(workspace_size){
//printf("%ld\n", workspace_size);
#ifdef GPU #ifdef GPU
net.workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); net.workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
#else
net.workspace = calloc(1, workspace_size);
#endif #endif
} }
return net; return net;

100
src/rnn.c
View File

@ -280,6 +280,104 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
printf("\n"); printf("\n");
} }
void test_tactic_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file)
{
char **tokens = 0;
if(token_file){
size_t n;
tokens = read_tokens(token_file, &n);
}
srand(rseed);
char *base = basecfg(cfgfile);
fprintf(stderr, "%s\n", base);
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
int inputs = get_network_input_size(net);
int i, j;
for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
int c = 0;
int len = strlen(seed);
float *input = calloc(inputs, sizeof(float));
float *out;
while((c = getc(stdin)) != EOF){
input[c] = 1;
out = network_predict(net, input);
input[c] = 0;
}
for(i = 0; i < num; ++i){
for(j = 0; j < inputs; ++j){
if (out[j] < .0001) out[j] = 0;
}
int next = sample_array(out, inputs);
if(c == '.' && next == '\n') break;
c = next;
print_symbol(c, tokens);
input[c] = 1;
out = network_predict(net, input);
input[c] = 0;
}
printf("\n");
}
void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed)
{
char *base = basecfg(cfgfile);
fprintf(stderr, "%s\n", base);
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
int inputs = get_network_input_size(net);
int count = 0;
int words = 1;
int c;
int len = strlen(seed);
float *input = calloc(inputs, sizeof(float));
int i;
for(i = 0; i < len; ++i){
c = seed[i];
input[(int)c] = 1;
network_predict(net, input);
input[(int)c] = 0;
}
float sum = 0;
c = getc(stdin);
float log2 = log(2);
int in = 0;
while(c != EOF){
int next = getc(stdin);
if(next == EOF) break;
if(next < 0 || next >= 255) error("Out of range character");
input[c] = 1;
float *out = network_predict(net, input);
input[c] = 0;
if(c == '.' && next == '\n') in = 0;
if(!in) {
if(c == '>' && next == '>'){
in = 1;
++words;
}
c = next;
continue;
}
++count;
sum += log(out[next])/log2;
c = next;
printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words));
}
}
void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) void valid_char_rnn(char *cfgfile, char *weightfile, char *seed)
{ {
char *base = basecfg(cfgfile); char *base = basecfg(cfgfile);
@ -389,6 +487,8 @@ void run_char_rnn(int argc, char **argv)
char *weights = (argc > 4) ? argv[4] : 0; char *weights = (argc > 4) ? argv[4] : 0;
if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized);
else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed);
else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed);
else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed);
else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens);
else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, seed, temp, rseed, tokens);
} }

View File

@ -424,6 +424,13 @@ float variance_array(float *a, int n)
return variance; return variance;
} }
int constrain_int(int a, int min, int max)
{
if (a < min) return min;
if (a > max) return max;
return a;
}
float constrain(float min, float max, float a) float constrain(float min, float max, float a)
{ {
if (a < min) return min; if (a < min) return min;
@ -431,6 +438,14 @@ float constrain(float min, float max, float a)
return a; return a;
} }
float dist_array(float *a, float *b, int n, int sub)
{
int i;
float sum = 0;
for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2);
return sqrt(sum);
}
float mse_array(float *a, int n) float mse_array(float *a, int n)
{ {
int i; int i;

View File

@ -36,6 +36,7 @@ void scale_array(float *a, int n, float s);
void translate_array(float *a, int n, float s); void translate_array(float *a, int n, float s);
int max_index(float *a, int n); int max_index(float *a, int n);
float constrain(float min, float max, float a); float constrain(float min, float max, float a);
int constrain_int(int a, int min, int max);
float mse_array(float *a, int n); float mse_array(float *a, int n);
float rand_normal(); float rand_normal();
size_t rand_size_t(); size_t rand_size_t();
@ -46,6 +47,7 @@ float mean_array(float *a, int n);
void mean_arrays(float **a, int n, int els, float *avg); void mean_arrays(float **a, int n, int els, float *avg);
float variance_array(float *a, int n); float variance_array(float *a, int n);
float mag_array(float *a, int n); float mag_array(float *a, int n);
float dist_array(float *a, float *b, int n, int sub);
float **one_hot_encode(float *a, int n, int k); float **one_hot_encode(float *a, int n, int k);
float sec(clock_t clocks); float sec(clock_t clocks);
int find_int_arg(int argc, char **argv, char *arg, int def); int find_int_arg(int argc, char **argv, char *arg, int def);