From 5f4a5f59b072d4029107422d30b04941424c48b1 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Mon, 23 Feb 2015 18:52:05 -0800 Subject: [PATCH] captcha stuff --- src/cost_layer.c | 4 +- src/darknet.c | 261 ++++++++++++++++++++++++++--------- src/data.c | 183 +++++++++++++----------- src/data.h | 7 +- src/image.c | 62 +++++++-- src/image.h | 4 +- src/network_kernels.cu | 5 + src/parser.c | 3 +- src/softmax_layer.c | 44 +++--- src/softmax_layer.h | 4 +- src/softmax_layer_kernels.cu | 4 +- src/utils.c | 11 ++ src/utils.h | 2 + 13 files changed, 414 insertions(+), 180 deletions(-) diff --git a/src/cost_layer.c b/src/cost_layer.c index a08562b0..34c8fb59 100644 --- a/src/cost_layer.c +++ b/src/cost_layer.c @@ -49,7 +49,7 @@ void forward_cost_layer(cost_layer layer, float *input, float *truth) if(layer.type == DETECTION){ int i; for(i = 0; i < layer.batch*layer.inputs; ++i){ - if((i%5) && !truth[(i/5)*5]) layer.delta[i] = 0; + if((i%25) && !truth[(i/25)*25]) layer.delta[i] = 0; } } *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); @@ -71,7 +71,7 @@ void forward_cost_layer_gpu(cost_layer layer, float * input, float * truth) axpy_ongpu(layer.batch*layer.inputs, -1, input, 1, layer.delta_gpu, 1); if(layer.type==DETECTION){ - mask_ongpu(layer.inputs*layer.batch, layer.delta_gpu, truth, 5); + mask_ongpu(layer.inputs*layer.batch, layer.delta_gpu, truth, 25); } cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs); diff --git a/src/darknet.c b/src/darknet.c index 92a91962..fc58f3d1 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -31,14 +31,17 @@ void test_parser() save_network(net, "cfg/trained_imagenet_smaller.cfg"); } +char *class_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; #define AMNT 3 void draw_detection(image im, float *box, int side) { + int classes = 20; + int elems = 4+classes+1; int j; int r, c; float amount[AMNT] = {0}; for(r = 0; r < side*side; ++r){ - float val = box[r*5]; + float val = box[r*elems]; for(j = 0; j < AMNT; ++j){ if(val > amount[j]) { float swap = val; @@ -51,21 +54,29 @@ void draw_detection(image im, float *box, int side) for(r = 0; r < side; ++r){ for(c = 0; c < side; ++c){ - j = (r*side + c) * 5; - printf("Prob: %f\n", box[j]); + j = (r*side + c) * elems; + //printf("%d\n", j); + //printf("Prob: %f\n", box[j]); if(box[j] >= smallest){ + int class = max_index(box+j+1, classes); + int z; + for(z = 0; z < classes; ++z) printf("%f %s\n", box[j+1+z], class_names[z]); + printf("%f %s\n", box[j+1+class], class_names[class]); + float red = get_color(0,class,classes); + float green = get_color(1,class,classes); + float blue = get_color(2,class,classes); + + j += classes; int d = im.w/side; int y = r*d+box[j+1]*d; int x = c*d+box[j+2]*d; int h = box[j+3]*im.h; int w = box[j+4]*im.w; - //printf("%f %f %f %f\n", box[j+1], box[j+2], box[j+3], box[j+4]); - //printf("%d %d %d %d\n", x, y, w, h); - //printf("%d %d %d %d\n", x-w/2, y-h/2, x+w/2, y+h/2); - draw_box(im, x-w/2, y-h/2, x+w/2, y+h/2); + draw_box(im, x-w/2, y-h/2, x+w/2, y+h/2,red,green,blue); } } } + //printf("Done\n"); show_image(im, "box"); cvWaitKey(0); } @@ -100,24 +111,24 @@ void train_detection_net(char *cfgfile, char *weightfile) srand(time(0)); //srand(23410); int i = net.seen/imgs; - list *plist = get_paths("/home/pjreddie/data/imagenet/horse_pos.txt"); + list *plist = get_paths("/home/pjreddie/data/voc/train.txt"); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); data train, buffer; int im_dim = 512; int jitter = 64; - pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, im_dim, im_dim, 7, 7, jitter, &buffer); + pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, 20, im_dim, im_dim, 7, 7, jitter, &buffer); clock_t time; while(1){ i += 1; time=clock(); pthread_join(load_thread, 0); train = buffer; - load_thread = load_data_detection_thread(imgs, paths, plist->size, im_dim, im_dim, 7, 7, jitter, &buffer); + load_thread = load_data_detection_thread(imgs, paths, plist->size, 20, im_dim, im_dim, 7, 7, jitter, &buffer); - /* - image im = float_to_image(im_dim - jitter, im_dim-jitter, 3, train.X.vals[923]); - draw_detection(im, train.y.vals[923], 7); +/* + image im = float_to_image(im_dim - jitter, im_dim-jitter, 3, train.X.vals[0]); + draw_detection(im, train.y.vals[0], 7); show_image(im, "truth"); cvWaitKey(0); */ @@ -128,7 +139,7 @@ void train_detection_net(char *cfgfile, char *weightfile) net.seen += imgs; avg_loss = avg_loss*.9 + loss*.1; printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); - if(i%100==0){ + if(i%800==0){ char buff[256]; sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); save_weights(net, buff); @@ -146,17 +157,20 @@ void validate_detection_net(char *cfgfile, char *weightfile) fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); srand(time(0)); - list *plist = get_paths("/home/pjreddie/data/imagenet/detection.val"); + list *plist = get_paths("/home/pjreddie/data/voc/val.txt"); char **paths = (char **)list_to_array(plist); + int num_output = 1225; + int im_size = 448; + int classes = 20; int m = plist->size; int i = 0; - int splits = 50; + int splits = 100; int num = (i+1)*m/splits - i*m/splits; fprintf(stderr, "%d\n", m); data val, buffer; - pthread_t load_thread = load_data_thread(paths, num, 0, 0, 245, 224, 224, &buffer); + pthread_t load_thread = load_data_thread(paths, num, 0, 0, num_output, im_size, im_size, &buffer); clock_t time; for(i = 1; i <= splits; ++i){ time=clock(); @@ -165,23 +179,33 @@ void validate_detection_net(char *cfgfile, char *weightfile) num = (i+1)*m/splits - i*m/splits; char **part = paths+(i*m/splits); - if(i != splits) load_thread = load_data_thread(part, num, 0, 0, 245, 224, 224, &buffer); + if(i != splits) load_thread = load_data_thread(part, num, 0, 0, num_output, im_size, im_size, &buffer); - fprintf(stderr, "Loaded: %lf seconds\n", sec(clock()-time)); + fprintf(stderr, "%d: Loaded: %lf seconds\n", i, sec(clock()-time)); matrix pred = network_predict_data(net, val); - int j, k; + int j, k, class; for(j = 0; j < pred.rows; ++j){ - for(k = 0; k < pred.cols; k += 5){ - if (pred.vals[j][k] > .005){ - int index = k/5; + for(k = 0; k < pred.cols; k += classes+4+1){ + + /* + int z; + for(z = 0; z < 25; ++z) printf("%f, ", pred.vals[j][k+z]); + printf("\n"); + */ + + float p = pred.vals[j][k]; + //if (pred.vals[j][k] > .001){ + for(class = 0; class < classes; ++class){ + int index = (k)/(classes+4+1); int r = index/7; int c = index%7; - float y = (32.*(r + pred.vals[j][k+1]))/224.; - float x = (32.*(c + pred.vals[j][k+2]))/224.; - float h = (256.*(pred.vals[j][k+3]))/224.; - float w = (256.*(pred.vals[j][k+4]))/224.; - printf("%d %f %f %f %f %f\n", (i-1)*m/splits + j + 1, pred.vals[j][k], y, x, h, w); + float y = (r + pred.vals[j][k+1+classes])/7.; + float x = (c + pred.vals[j][k+2+classes])/7.; + float h = pred.vals[j][k+3+classes]; + float w = pred.vals[j][k+4+classes]; + printf("%d %d %f %f %f %f %f\n", (i-1)*m/splits + j, class, p*pred.vals[j][k+class+1], y, x, h, w); } + //} } } @@ -191,44 +215,44 @@ void validate_detection_net(char *cfgfile, char *weightfile) } /* -void train_imagenet_distributed(char *address) -{ - float avg_loss = 1; - srand(time(0)); - network net = parse_network_cfg("cfg/net.cfg"); - set_learning_network(&net, 0, 1, 0); - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = net.batch; - int i = 0; - char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list"); - list *plist = get_paths("/data/imagenet/cls.train.list"); - char **paths = (char **)list_to_array(plist); - printf("%d\n", plist->size); - clock_t time; - data train, buffer; - pthread_t load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer); - while(1){ - i += 1; + void train_imagenet_distributed(char *address) + { + float avg_loss = 1; + srand(time(0)); + network net = parse_network_cfg("cfg/net.cfg"); + set_learning_network(&net, 0, 1, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch; + int i = 0; + char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list"); + list *plist = get_paths("/data/imagenet/cls.train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + data train, buffer; + pthread_t load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer); + while(1){ + i += 1; - time=clock(); - client_update(net, address); - printf("Updated: %lf seconds\n", sec(clock()-time)); + time=clock(); + client_update(net, address); + printf("Updated: %lf seconds\n", sec(clock()-time)); - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - normalize_data_rows(train); - load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer); - printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + normalize_data_rows(train); + load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); - float loss = train_network(net, train); - avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); - free_data(train); - } -} -*/ + float loss = train_network(net, train); + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); + free_data(train); + } + } + */ void convert(char *cfgfile, char *outfile, char *weightfile) { @@ -239,6 +263,111 @@ void convert(char *cfgfile, char *outfile, char *weightfile) save_network(net, outfile); } +void train_captcha(char *cfgfile, char *weightfile) +{ + float avg_loss = -1; + srand(time(0)); + char *base = basename(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = net.seen/imgs; + list *plist = get_paths("/data/captcha/train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_captcha(paths, imgs, plist->size, 10, 60, 200); + translate_data_rows(train, -128); + scale_data_rows(train, 1./128); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net.seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); + free_data(train); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + + +void validate_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basename(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + int imgs = 1000; + int numchars = 37; + list *plist = get_paths("/data/captcha/valid.list"); + char **paths = (char **)list_to_array(plist); + data valid = load_data_captcha(paths, imgs, 0, 10, 60, 200); + translate_data_rows(valid, -128); + scale_data_rows(valid, 1./128); + matrix pred = network_predict_data(net, valid); + int i, k; + int correct = 0; + int total = 0; + int accuracy = 0; + for(i = 0; i < imgs; ++i){ + int allcorrect = 1; + for(k = 0; k < 10; ++k){ + char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); + char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); + if (truth != prediction) allcorrect=0; + if (truth != '.' && truth == prediction) ++correct; + if (truth != '.' || truth != prediction) ++total; + } + accuracy += allcorrect; + } + printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); + free_data(valid); +} + +void test_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basename(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + clock_t time; + char filename[256]; + while(1){ + printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + time = clock(); + image im = load_image_color(filename, 60, 200); + translate_image(im, -128); + scale_image(im, 1/128.); + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + printf("Predicted in %f\n", sec(clock() - time)); + print_letters(predictions, 10); + free_image(im); + } +} + void train_imagenet(char *cfgfile, char *weightfile) { float avg_loss = -1; @@ -333,6 +462,7 @@ void test_detection(char *cfgfile, char *weightfile) if(weightfile){ load_weights(&net, weightfile); } + int im_size = 224; set_batch_network(&net, 1); srand(2222222); clock_t time; @@ -340,7 +470,7 @@ void test_detection(char *cfgfile, char *weightfile) while(1){ fgets(filename, 256, stdin); strtok(filename, "\n"); - image im = load_image_color(filename, 224, 224); + image im = load_image_color(filename, im_size, im_size); translate_image(im, -128); scale_image(im, 1/128.); printf("%d %d %d\n", im.h, im.w, im.c); @@ -814,6 +944,9 @@ int main(int argc, char **argv) else if(0==strcmp(argv[1], "nist")) train_nist(argv[2]); else if(0==strcmp(argv[1], "ctest")) test_cifar10(argv[2]); else if(0==strcmp(argv[1], "train")) train_imagenet(argv[2], (argc > 3)? argv[3] : 0); + else if(0==strcmp(argv[1], "captcha")) train_captcha(argv[2], (argc > 3)? argv[3] : 0); + else if(0==strcmp(argv[1], "tcaptcha")) test_captcha(argv[2], (argc > 3)? argv[3] : 0); + else if(0==strcmp(argv[1], "vcaptcha")) validate_captcha(argv[2], (argc > 3)? argv[3] : 0); else if(0==strcmp(argv[1], "testseg")) test_voc_segment(argv[2], (argc > 3)? argv[3] : 0); //else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]); else if(0==strcmp(argv[1], "detect")) test_detection(argv[2], (argc > 3)? argv[3] : 0); diff --git a/src/data.c b/src/data.c index fd6b7222..a6b6db36 100644 --- a/src/data.c +++ b/src/data.c @@ -17,6 +17,7 @@ struct load_args{ int nh; int nw; int jitter; + int classes; data *d; }; @@ -33,53 +34,16 @@ list *get_paths(char *filename) return lines; } -void fill_truth_detection(char *path, float *truth, int height, int width, int num_height, int num_width, int dy, int dx, int jitter) -{ - int box_height = height/num_height; - int box_width = width/num_width; - char *labelpath = find_replace(path, "imgs", "det/train"); - labelpath = find_replace(labelpath, ".JPEG", ".txt"); - FILE *file = fopen(labelpath, "r"); - if(!file) file_error(labelpath); - float x, y, h, w; - while(fscanf(file, "%f %f %f %f", &x, &y, &w, &h) == 4){ - x *= width + jitter; - y *= height + jitter; - x -= dx; - y -= dy; - int i = x/box_width; - int j = y/box_height; - - if(i < 0) i = 0; - if(i >= num_width) i = num_width-1; - if(j < 0) j = 0; - if(j >= num_height) j = num_height-1; - - float dw = (x - i*box_width)/box_width; - float dh = (y - j*box_height)/box_height; - //printf("%d %d %f %f\n", i, j, dh, dw); - int index = (i+j*num_width)*5; - truth[index++] = 1; - truth[index++] = dh; - truth[index++] = dw; - truth[index++] = h*(height+jitter)/height; - truth[index++] = w*(width+jitter)/width; - } - fclose(file); -} - -void fill_truth(char *path, char **labels, int k, float *truth) +char **get_random_paths(char **paths, int n, int m) { + char **random_paths = calloc(n, sizeof(char*)); int i; - memset(truth, 0, k*sizeof(float)); - int count = 0; - for(i = 0; i < k; ++i){ - if(strstr(path, labels[i])){ - truth[i] = 1; - ++count; - } + for(i = 0; i < n; ++i){ + int index = rand()%m; + random_paths[i] = paths[index]; + if(i == 0) printf("%s\n", paths[index]); } - if(count != 1) printf("%d, %s\n", count, path); + return random_paths; } matrix load_image_paths(char **paths, int n, int h, int w) @@ -98,16 +62,100 @@ matrix load_image_paths(char **paths, int n, int h, int w) return X; } -char **get_random_paths(char **paths, int n, int m) +void fill_truth_detection(char *path, float *truth, int classes, int height, int width, int num_height, int num_width, int dy, int dx, int jitter, int flip) +{ + int box_height = height/num_height; + int box_width = width/num_width; + char *labelpath = find_replace(path, "VOC2012/JPEGImages", "labels"); + labelpath = find_replace(labelpath, ".jpg", ".txt"); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + float x, y, h, w; + int id; + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + if(flip) x = 1-x; + x *= width + jitter; + y *= height + jitter; + x -= dx; + y -= dy; + int i = x/box_width; + int j = y/box_height; + + if(i < 0) i = 0; + if(i >= num_width) i = num_width-1; + if(j < 0) j = 0; + if(j >= num_height) j = num_height-1; + + float dw = (x - i*box_width)/box_width; + float dh = (y - j*box_height)/box_height; + //printf("%d %d %d %f %f\n", id, i, j, dh, dw); + int index = (i+j*num_width)*(4+classes+1); + truth[index++] = 1; + truth[index+id] = 1; + index += classes; + truth[index++] = dh; + truth[index++] = dw; + truth[index++] = h*(height+jitter)/height; + truth[index++] = w*(width+jitter)/width; + } + fclose(file); +} + +#define NUMCHARS 37 + +void print_letters(float *pred, int n) { - char **random_paths = calloc(n, sizeof(char*)); int i; for(i = 0; i < n; ++i){ - int index = rand()%m; - random_paths[i] = paths[index]; - if(i == 0) printf("%s\n", paths[index]); + int index = max_index(pred+i*NUMCHARS, NUMCHARS); + printf("%c", int_to_alphanum(index)); } - return random_paths; + printf("\n"); +} + +void fill_truth_captcha(char *path, int n, float *truth) +{ + char *begin = strrchr(path, '/'); + ++begin; + int i; + for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + int index = alphanum_to_int(begin[i]); + if(index > 35) printf("Bad %c\n", begin[i]); + truth[i*NUMCHARS+index] = 1; + } + for(;i < n; ++i){ + truth[i*NUMCHARS + NUMCHARS-1] = 1; + } +} + +data load_data_captcha(char **paths, int n, int m, int k, int h, int w) +{ + if(m) paths = get_random_paths(paths, n, m); + data d; + d.shallow = 0; + d.X = load_image_paths(paths, n, h, w); + d.y = make_matrix(n, k*NUMCHARS); + int i; + for(i = 0; i < n; ++i){ + fill_truth_captcha(paths[i], k, d.y.vals[i]); + } + if(m) free(paths); + return d; +} + + +void fill_truth(char *path, char **labels, int k, float *truth) +{ + int i; + memset(truth, 0, k*sizeof(float)); + int count = 0; + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + ++count; + } + } + if(count != 1) printf("%d, %s\n", count, path); } matrix load_labels_paths(char **paths, int n, char **labels, int k) @@ -120,17 +168,6 @@ matrix load_labels_paths(char **paths, int n, char **labels, int k) return y; } -matrix load_labels_detection(char **paths, int n, int height, int width, int num_height, int num_width) -{ - int k = num_height*num_width*5; - matrix y = make_matrix(n, k); - int i; - for(i = 0; i < n; ++i){ - fill_truth_detection(paths[i], y.vals[i], height, width, num_height, num_width, 0, 0, 0); - } - return y; -} - data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w) { list *plist = get_paths(filename); @@ -165,20 +202,22 @@ void free_data(data d) } } -data load_data_detection_jitter_random(int n, char **paths, int m, int h, int w, int nh, int nw, int jitter) +data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter) { char **random_paths = get_random_paths(paths, n, m); int i; data d; d.shallow = 0; d.X = load_image_paths(random_paths, n, h, w); - int k = nh*nw*5; + int k = nh*nw*(4+classes+1); d.y = make_matrix(n, k); for(i = 0; i < n; ++i){ int dx = rand()%jitter; int dy = rand()%jitter; - fill_truth_detection(random_paths[i], d.y.vals[i], h-jitter, w-jitter, nh, nw, dy, dx, jitter); + int flip = rand()%2; + fill_truth_detection(random_paths[i], d.y.vals[i], classes, h-jitter, w-jitter, nh, nw, dy, dx, jitter, flip); image a = float_to_image(h, w, 3, d.X.vals[i]); + if(flip) flip_image(a); jitter_image(a,h-jitter,w-jitter,dy,dx); } d.X.cols = (h-jitter)*(w-jitter)*3; @@ -189,14 +228,14 @@ data load_data_detection_jitter_random(int n, char **paths, int m, int h, int w, void *load_detection_thread(void *ptr) { struct load_args a = *(struct load_args*)ptr; - *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.h, a.w, a.nh, a.nw, a.jitter); + *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.h, a.w, a.nh, a.nw, a.jitter); translate_data_rows(*a.d, -128); scale_data_rows(*a.d, 1./128); free(ptr); return 0; } -pthread_t load_data_detection_thread(int n, char **paths, int m, int h, int w, int nh, int nw, int jitter, data *d) +pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d) { pthread_t thread; struct load_args *args = calloc(1, sizeof(struct load_args)); @@ -207,6 +246,7 @@ pthread_t load_data_detection_thread(int n, char **paths, int m, int h, int w, i args->w = w; args->nh = nh; args->nw = nw; + args->classes = classes; args->jitter = jitter; args->d = d; if(pthread_create(&thread, 0, load_detection_thread, args)) { @@ -215,17 +255,6 @@ pthread_t load_data_detection_thread(int n, char **paths, int m, int h, int w, i return thread; } -data load_data_detection_random(int n, char **paths, int m, int h, int w, int nh, int nw) -{ - char **random_paths = get_random_paths(paths, n, m); - data d; - d.shallow = 0; - d.X = load_image_paths(random_paths, n, h, w); - d.y = load_labels_detection(random_paths, n, h, w, nh, nw); - free(random_paths); - return d; -} - data load_data(char **paths, int n, int m, char **labels, int k, int h, int w) { if(m) paths = get_random_paths(paths, n, m); diff --git a/src/data.h b/src/data.h index 13b62d8d..6a08c885 100644 --- a/src/data.h +++ b/src/data.h @@ -14,12 +14,13 @@ typedef struct{ void free_data(data d); +void print_letters(float *pred, int n); +data load_data_captcha(char **paths, int n, int m, int k, int h, int w); data load_data(char **paths, int n, int m, char **labels, int k, int h, int w); pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d); -pthread_t load_data_detection_thread(int n, char **paths, int m, int h, int w, int nh, int nw, int jitter, data *d); -data load_data_detection_jitter_random(int n, char **paths, int m, int h, int w, int nh, int nw, int jitter); -data load_data_detection_random(int n, char **paths, int m, int h, int w, int nh, int nw); +pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d); +data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter); data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w); data load_cifar10_data(char *filename); diff --git a/src/image.c b/src/image.c index a686a3e0..53cf281c 100644 --- a/src/image.c +++ b/src/image.c @@ -4,9 +4,23 @@ int windows = 0; -void draw_box(image a, int x1, int y1, int x2, int y2) +float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; + +float get_color(int c, int x, int max) { - int i, c; + float ratio = ((float)x/max)*5; + int i = floor(ratio); + int j = ceil(ratio); + ratio -= i; + float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; + printf("%f\n", r); + return r; +} + +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) +{ + normalize_image(a); + int i; if(x1 < 0) x1 = 0; if(x1 >= a.w) x1 = a.w-1; if(x2 < 0) x2 = 0; @@ -17,17 +31,25 @@ void draw_box(image a, int x1, int y1, int x2, int y2) if(y2 < 0) y2 = 0; if(y2 >= a.h) y2 = a.h-1; - for(c = 0; c < a.c; ++c){ - for(i = x1; i < x2; ++i){ - a.data[i + y1*a.w + c*a.w*a.h] = (c==0)?1:-1; - a.data[i + y2*a.w + c*a.w*a.h] = (c==0)?1:-1; - } + for(i = x1; i < x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = b; + a.data[i + y2*a.w + 0*a.w*a.h] = b; + + a.data[i + y1*a.w + 1*a.w*a.h] = g; + a.data[i + y2*a.w + 1*a.w*a.h] = g; + + a.data[i + y1*a.w + 2*a.w*a.h] = r; + a.data[i + y2*a.w + 2*a.w*a.h] = r; } - for(c = 0; c < a.c; ++c){ - for(i = y1; i < y2; ++i){ - a.data[x1 + i*a.w + c*a.w*a.h] = (c==0)?1:-1; - a.data[x2 + i*a.w + c*a.w*a.h] = (c==0)?1:-1; - } + for(i = y1; i < y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = b; + a.data[x2 + i*a.w + 0*a.w*a.h] = b; + + a.data[x1 + i*a.w + 1*a.w*a.h] = g; + a.data[x2 + i*a.w + 1*a.w*a.h] = g; + + a.data[x1 + i*a.w + 2*a.w*a.h] = r; + a.data[x2 + i*a.w + 2*a.w*a.h] = r; } } @@ -46,6 +68,22 @@ void jitter_image(image a, int h, int w, int dh, int dw) } } +void flip_image(image a) +{ + int i,j,k; + for(k = 0; k < a.c; ++k){ + for(i = 0; i < a.h; ++i){ + for(j = 0; j < a.w/2; ++j){ + int index = j + a.w*(i + a.h*(k)); + int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); + float swap = a.data[flip]; + a.data[flip] = a.data[index]; + a.data[index] = swap; + } + } + } +} + image image_distance(image a, image b) { int i,j; diff --git a/src/image.h b/src/image.h index 219798da..93b9e7e2 100644 --- a/src/image.h +++ b/src/image.h @@ -11,8 +11,10 @@ typedef struct { float *data; } image; +float get_color(int c, int x, int max); void jitter_image(image a, int h, int w, int dh, int dw); -void draw_box(image a, int x1, int y1, int x2, int y2); +void flip_image(image a); +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); image image_distance(image a, image b); void scale_image(image m, float s); void translate_image(image m, float s); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 1f3f2e0b..b83d0566 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -21,6 +21,7 @@ extern "C" { extern "C" float * get_network_output_gpu_layer(network net, int i); extern "C" float * get_network_delta_gpu_layer(network net, int i); +float *get_network_output_gpu(network net); void forward_network_gpu(network net, float * input, float * truth, int train) { @@ -219,6 +220,10 @@ float train_network_datum_gpu(network net, float *x, float *y) //time = clock(); update_network_gpu(net); float error = get_network_cost(net); + + //print_letters(y, 50); + //float *out = get_network_output_gpu(net); + //print_letters(out, 50); //printf("updt %f\n", sec(clock() - time)); //time = clock(); return error; diff --git a/src/parser.c b/src/parser.c index 3f94c809..850cc388 100644 --- a/src/parser.c +++ b/src/parser.c @@ -191,6 +191,7 @@ connected_layer *parse_connected(list *options, network *net, int count) softmax_layer *parse_softmax(list *options, network *net, int count) { int input; + int groups = option_find_int(options, "groups",1); if(count == 0){ input = option_find_int(options, "input",1); net->batch = option_find_int(options, "batch",1); @@ -198,7 +199,7 @@ softmax_layer *parse_softmax(list *options, network *net, int count) }else{ input = get_network_output_size_layer(*net, count-1); } - softmax_layer *layer = make_softmax_layer(net->batch, input); + softmax_layer *layer = make_softmax_layer(net->batch, groups, input); option_unused(options); return layer; } diff --git a/src/softmax_layer.c b/src/softmax_layer.c index aa5ab069..a200ae54 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -5,16 +5,18 @@ #include #include #include +#include -softmax_layer *make_softmax_layer(int batch, int inputs) +softmax_layer *make_softmax_layer(int batch, int groups, int inputs) { + assert(inputs%groups == 0); fprintf(stderr, "Softmax Layer: %d inputs\n", inputs); softmax_layer *layer = calloc(1, sizeof(softmax_layer)); layer->batch = batch; + layer->groups = groups; layer->inputs = inputs; layer->output = calloc(inputs*batch, sizeof(float)); layer->delta = calloc(inputs*batch, sizeof(float)); - layer->jacobian = calloc(inputs*inputs*batch, sizeof(float)); #ifdef GPU layer->output_gpu = cuda_make_array(layer->output, inputs*batch); layer->delta_gpu = cuda_make_array(layer->delta, inputs*batch); @@ -22,23 +24,31 @@ softmax_layer *make_softmax_layer(int batch, int inputs) return layer; } +void softmax_array(float *input, int n, float *output) +{ + int i; + float sum = 0; + float largest = -FLT_MAX; + for(i = 0; i < n; ++i){ + if(input[i] > largest) largest = input[i]; + } + for(i = 0; i < n; ++i){ + sum += exp(input[i]-largest); + } + if(sum) sum = largest+log(sum); + else sum = largest-100; + for(i = 0; i < n; ++i){ + output[i] = exp(input[i]-sum); + } +} + void forward_softmax_layer(const softmax_layer layer, float *input) { - int i,b; - for(b = 0; b < layer.batch; ++b){ - float sum = 0; - float largest = -FLT_MAX; - for(i = 0; i < layer.inputs; ++i){ - if(input[i+b*layer.inputs] > largest) largest = input[i+b*layer.inputs]; - } - for(i = 0; i < layer.inputs; ++i){ - sum += exp(input[i+b*layer.inputs]-largest); - } - if(sum) sum = largest+log(sum); - else sum = largest-100; - for(i = 0; i < layer.inputs; ++i){ - layer.output[i+b*layer.inputs] = exp(input[i+b*layer.inputs]-sum); - } + int b; + int inputs = layer.inputs / layer.groups; + int batch = layer.batch * layer.groups; + for(b = 0; b < batch; ++b){ + softmax_array(input+b*inputs, inputs, layer.output+b*inputs); } } diff --git a/src/softmax_layer.h b/src/softmax_layer.h index 0cc9d538..1c1cdae8 100644 --- a/src/softmax_layer.h +++ b/src/softmax_layer.h @@ -4,16 +4,16 @@ typedef struct { int inputs; int batch; + int groups; float *delta; float *output; - float *jacobian; #ifdef GPU float * delta_gpu; float * output_gpu; #endif } softmax_layer; -softmax_layer *make_softmax_layer(int batch, int inputs); +softmax_layer *make_softmax_layer(int batch, int groups, int inputs); void forward_softmax_layer(const softmax_layer layer, float *input); void backward_softmax_layer(const softmax_layer layer, float *delta); diff --git a/src/softmax_layer_kernels.cu b/src/softmax_layer_kernels.cu index 61dc6071..c0e8bc38 100644 --- a/src/softmax_layer_kernels.cu +++ b/src/softmax_layer_kernels.cu @@ -34,7 +34,9 @@ extern "C" void pull_softmax_layer_output(const softmax_layer layer) extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, float *input) { - forward_softmax_layer_kernel<<>>(layer.inputs, layer.batch, input, layer.output_gpu); + int inputs = layer.inputs / layer.groups; + int batch = layer.batch * layer.groups; + forward_softmax_layer_kernel<<>>(inputs, batch, input, layer.output_gpu); check_error(cudaPeekAtLastError()); /* diff --git a/src/utils.c b/src/utils.c index bf02ff3a..1db81013 100644 --- a/src/utils.c +++ b/src/utils.c @@ -8,6 +8,17 @@ #include "utils.h" + +int alphanum_to_int(char c) +{ + return (c < 58) ? c - 48 : c-87; +} +char int_to_alphanum(int i) +{ + if (i == 36) return '.'; + return (i < 10) ? i + 48 : i + 87; +} + void pm(int M, int N, float *A) { int i,j; diff --git a/src/utils.h b/src/utils.h index e233da85..7ae8a8d0 100644 --- a/src/utils.h +++ b/src/utils.h @@ -4,6 +4,8 @@ #include #include "list.h" +int alphanum_to_int(char c); +char int_to_alphanum(int i); void read_all(int fd, char *buffer, size_t bytes); void write_all(int fd, char *buffer, size_t bytes); char *find_replace(char *str, char *orig, char *rep);