mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
better multigpu
This commit is contained in:
parent
5c067dc447
commit
73f7aacf35
@ -365,7 +365,7 @@ __global__ void const_kernel(int N, float ALPHA, float *X, int INCX)
|
|||||||
__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX)
|
__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX)
|
||||||
{
|
{
|
||||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||||
if(i < N) X[i*INCX] = min(ALPHA, max(-ALPHA, X[i*INCX]));
|
if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX]));
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
|
__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
|
||||||
|
@ -28,7 +28,6 @@ void fix_data_captcha(data d, int mask)
|
|||||||
|
|
||||||
void train_captcha(char *cfgfile, char *weightfile)
|
void train_captcha(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
|
@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
void train_cifar(char *cfgfile, char *weightfile)
|
void train_cifar(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
@ -59,7 +58,6 @@ void train_cifar(char *cfgfile, char *weightfile)
|
|||||||
|
|
||||||
void train_cifar_distill(char *cfgfile, char *weightfile)
|
void train_cifar_distill(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
|
204
src/classifier.c
204
src/classifier.c
@ -55,10 +55,8 @@ float *get_regression_values(char **labels, int n)
|
|||||||
void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
|
void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
int nthreads = 8;
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
@ -68,17 +66,20 @@ void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int
|
|||||||
for(i = 0; i < ngpus; ++i){
|
for(i = 0; i < ngpus; ++i){
|
||||||
cuda_set_device(gpus[i]);
|
cuda_set_device(gpus[i]);
|
||||||
nets[i] = parse_network_cfg(cfgfile);
|
nets[i] = parse_network_cfg(cfgfile);
|
||||||
if(weightfile){
|
|
||||||
load_weights(&(nets[i]), weightfile);
|
|
||||||
}
|
|
||||||
if(clear) *nets[i].seen = 0;
|
if(clear) *nets[i].seen = 0;
|
||||||
|
if(weightfile){
|
||||||
|
load_weights(&nets[i], weightfile);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
network net = nets[0];
|
network net = nets[0];
|
||||||
|
for(i = 0; i < ngpus; ++i){
|
||||||
|
*nets[i].seen = *net.seen;
|
||||||
|
nets[i].learning_rate *= ngpus;
|
||||||
|
}
|
||||||
|
|
||||||
|
int imgs = net.batch * net.subdivisions * ngpus;
|
||||||
|
|
||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
int imgs = net.batch*ngpus/nthreads;
|
|
||||||
assert(net.batch*ngpus % nthreads == 0);
|
|
||||||
|
|
||||||
list *options = read_data_cfg(datacfg);
|
list *options = read_data_cfg(datacfg);
|
||||||
|
|
||||||
char *backup_directory = option_find_str(options, "backup", "/backup/");
|
char *backup_directory = option_find_str(options, "backup", "/backup/");
|
||||||
@ -93,13 +94,10 @@ void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int
|
|||||||
int N = plist->size;
|
int N = plist->size;
|
||||||
clock_t time;
|
clock_t time;
|
||||||
|
|
||||||
pthread_t *load_threads = calloc(nthreads, sizeof(pthread_t));
|
|
||||||
data *trains = calloc(nthreads, sizeof(data));
|
|
||||||
data *buffers = calloc(nthreads, sizeof(data));
|
|
||||||
|
|
||||||
load_args args = {0};
|
load_args args = {0};
|
||||||
args.w = net.w;
|
args.w = net.w;
|
||||||
args.h = net.h;
|
args.h = net.h;
|
||||||
|
args.threads = 16;
|
||||||
|
|
||||||
args.min = net.min_crop;
|
args.min = net.min_crop;
|
||||||
args.max = net.max_crop;
|
args.max = net.max_crop;
|
||||||
@ -117,36 +115,28 @@ void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int
|
|||||||
args.labels = labels;
|
args.labels = labels;
|
||||||
args.type = CLASSIFICATION_DATA;
|
args.type = CLASSIFICATION_DATA;
|
||||||
|
|
||||||
for(i = 0; i < nthreads; ++i){
|
data train;
|
||||||
args.d = buffers + i;
|
data buffer;
|
||||||
load_threads[i] = load_data_in_thread(args);
|
pthread_t load_thread;
|
||||||
}
|
args.d = &buffer;
|
||||||
|
load_thread = load_data(args);
|
||||||
|
|
||||||
int epoch = (*net.seen)/N;
|
int epoch = (*net.seen)/N;
|
||||||
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
||||||
time=clock();
|
time=clock();
|
||||||
for(i = 0; i < nthreads; ++i){
|
|
||||||
pthread_join(load_threads[i], 0);
|
|
||||||
trains[i] = buffers[i];
|
|
||||||
}
|
|
||||||
data train = concat_datas(trains, nthreads);
|
|
||||||
|
|
||||||
for(i = 0; i < nthreads; ++i){
|
pthread_join(load_thread, 0);
|
||||||
args.d = buffers + i;
|
train = buffer;
|
||||||
load_threads[i] = load_data_in_thread(args);
|
load_thread = load_data(args);
|
||||||
}
|
|
||||||
|
|
||||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||||
time=clock();
|
time=clock();
|
||||||
|
|
||||||
float loss = train_networks(nets, ngpus, train);
|
float loss = train_networks(nets, ngpus, train, 4);
|
||||||
if(avg_loss == -1) avg_loss = loss;
|
if(avg_loss == -1) avg_loss = loss;
|
||||||
avg_loss = avg_loss*.9 + loss*.1;
|
avg_loss = avg_loss*.9 + loss*.1;
|
||||||
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
||||||
free_data(train);
|
free_data(train);
|
||||||
for(i = 0; i < nthreads; ++i){
|
|
||||||
free_data(trains[i]);
|
|
||||||
}
|
|
||||||
if(*net.seen/N > epoch){
|
if(*net.seen/N > epoch){
|
||||||
epoch = *net.seen/N;
|
epoch = *net.seen/N;
|
||||||
char buff[256];
|
char buff[256];
|
||||||
@ -163,14 +153,6 @@ void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int
|
|||||||
sprintf(buff, "%s/%s.weights", backup_directory, base);
|
sprintf(buff, "%s/%s.weights", backup_directory, base);
|
||||||
save_weights(net, buff);
|
save_weights(net, buff);
|
||||||
|
|
||||||
for(i = 0; i < nthreads; ++i){
|
|
||||||
pthread_join(load_threads[i], 0);
|
|
||||||
free_data(buffers[i]);
|
|
||||||
}
|
|
||||||
free(buffers);
|
|
||||||
free(trains);
|
|
||||||
free(load_threads);
|
|
||||||
|
|
||||||
free_network(net);
|
free_network(net);
|
||||||
free_ptrs((void**)labels, classes);
|
free_ptrs((void**)labels, classes);
|
||||||
free_ptrs((void**)paths, plist->size);
|
free_ptrs((void**)paths, plist->size);
|
||||||
@ -182,10 +164,6 @@ void train_classifier_multi(char *datacfg, char *cfgfile, char *weightfile, int
|
|||||||
|
|
||||||
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
||||||
{
|
{
|
||||||
int nthreads = 8;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
@ -195,10 +173,10 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||||||
load_weights(&net, weightfile);
|
load_weights(&net, weightfile);
|
||||||
}
|
}
|
||||||
if(clear) *net.seen = 0;
|
if(clear) *net.seen = 0;
|
||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
|
||||||
int imgs = net.batch*net.subdivisions/nthreads;
|
|
||||||
assert(net.batch*net.subdivisions % nthreads == 0);
|
|
||||||
|
|
||||||
|
int imgs = net.batch * net.subdivisions;
|
||||||
|
|
||||||
|
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
list *options = read_data_cfg(datacfg);
|
list *options = read_data_cfg(datacfg);
|
||||||
|
|
||||||
char *backup_directory = option_find_str(options, "backup", "/backup/");
|
char *backup_directory = option_find_str(options, "backup", "/backup/");
|
||||||
@ -213,13 +191,10 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||||||
int N = plist->size;
|
int N = plist->size;
|
||||||
clock_t time;
|
clock_t time;
|
||||||
|
|
||||||
pthread_t *load_threads = calloc(nthreads, sizeof(pthread_t));
|
|
||||||
data *trains = calloc(nthreads, sizeof(data));
|
|
||||||
data *buffers = calloc(nthreads, sizeof(data));
|
|
||||||
|
|
||||||
load_args args = {0};
|
load_args args = {0};
|
||||||
args.w = net.w;
|
args.w = net.w;
|
||||||
args.h = net.h;
|
args.h = net.h;
|
||||||
|
args.threads = 8;
|
||||||
|
|
||||||
args.min = net.min_crop;
|
args.min = net.min_crop;
|
||||||
args.max = net.max_crop;
|
args.max = net.max_crop;
|
||||||
@ -237,24 +212,19 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||||||
args.labels = labels;
|
args.labels = labels;
|
||||||
args.type = CLASSIFICATION_DATA;
|
args.type = CLASSIFICATION_DATA;
|
||||||
|
|
||||||
for(i = 0; i < nthreads; ++i){
|
data train;
|
||||||
args.d = buffers + i;
|
data buffer;
|
||||||
load_threads[i] = load_data_in_thread(args);
|
pthread_t load_thread;
|
||||||
}
|
args.d = &buffer;
|
||||||
|
load_thread = load_data(args);
|
||||||
|
|
||||||
int epoch = (*net.seen)/N;
|
int epoch = (*net.seen)/N;
|
||||||
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
||||||
time=clock();
|
time=clock();
|
||||||
for(i = 0; i < nthreads; ++i){
|
|
||||||
pthread_join(load_threads[i], 0);
|
|
||||||
trains[i] = buffers[i];
|
|
||||||
}
|
|
||||||
data train = concat_datas(trains, nthreads);
|
|
||||||
|
|
||||||
for(i = 0; i < nthreads; ++i){
|
pthread_join(load_thread, 0);
|
||||||
args.d = buffers + i;
|
train = buffer;
|
||||||
load_threads[i] = load_data_in_thread(args);
|
load_thread = load_data(args);
|
||||||
}
|
|
||||||
|
|
||||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||||
time=clock();
|
time=clock();
|
||||||
@ -271,13 +241,11 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
float loss = train_network(net, train);
|
float loss = train_network(net, train);
|
||||||
|
free_data(train);
|
||||||
|
|
||||||
if(avg_loss == -1) avg_loss = loss;
|
if(avg_loss == -1) avg_loss = loss;
|
||||||
avg_loss = avg_loss*.9 + loss*.1;
|
avg_loss = avg_loss*.9 + loss*.1;
|
||||||
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
||||||
free_data(train);
|
|
||||||
for(i = 0; i < nthreads; ++i){
|
|
||||||
free_data(trains[i]);
|
|
||||||
}
|
|
||||||
if(*net.seen/N > epoch){
|
if(*net.seen/N > epoch){
|
||||||
epoch = *net.seen/N;
|
epoch = *net.seen/N;
|
||||||
char buff[256];
|
char buff[256];
|
||||||
@ -294,14 +262,6 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||||||
sprintf(buff, "%s/%s.weights", backup_directory, base);
|
sprintf(buff, "%s/%s.weights", backup_directory, base);
|
||||||
save_weights(net, buff);
|
save_weights(net, buff);
|
||||||
|
|
||||||
for(i = 0; i < nthreads; ++i){
|
|
||||||
pthread_join(load_threads[i], 0);
|
|
||||||
free_data(buffers[i]);
|
|
||||||
}
|
|
||||||
free(buffers);
|
|
||||||
free(trains);
|
|
||||||
free(load_threads);
|
|
||||||
|
|
||||||
free_network(net);
|
free_network(net);
|
||||||
free_ptrs((void**)labels, classes);
|
free_ptrs((void**)labels, classes);
|
||||||
free_ptrs((void**)paths, plist->size);
|
free_ptrs((void**)paths, plist->size);
|
||||||
@ -934,7 +894,19 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
|
|||||||
int w = x2 - x1 - 2*border;
|
int w = x2 - x1 - 2*border;
|
||||||
|
|
||||||
float *predictions = network_predict(net, in_s.data);
|
float *predictions = network_predict(net, in_s.data);
|
||||||
float curr_threat = predictions[0] * 0 + predictions[1] * .6 + predictions[2];
|
float curr_threat = 0;
|
||||||
|
if(1){
|
||||||
|
curr_threat = predictions[0] * 0 +
|
||||||
|
predictions[1] * .6 +
|
||||||
|
predictions[2];
|
||||||
|
} else {
|
||||||
|
curr_threat = predictions[218] +
|
||||||
|
predictions[539] +
|
||||||
|
predictions[540] +
|
||||||
|
predictions[368] +
|
||||||
|
predictions[369] +
|
||||||
|
predictions[370];
|
||||||
|
}
|
||||||
threat = roll * curr_threat + (1-roll) * threat;
|
threat = roll * curr_threat + (1-roll) * threat;
|
||||||
|
|
||||||
draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0);
|
draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0);
|
||||||
@ -970,7 +942,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
|
|||||||
top_predictions(net, top, indexes);
|
top_predictions(net, top, indexes);
|
||||||
char buff[256];
|
char buff[256];
|
||||||
sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
|
sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
|
||||||
save_image(out, buff);
|
//save_image(out, buff);
|
||||||
|
|
||||||
printf("\033[2J");
|
printf("\033[2J");
|
||||||
printf("\033[1;1H");
|
printf("\033[1;1H");
|
||||||
@ -981,7 +953,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
|
|||||||
printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
|
printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(0){
|
if(1){
|
||||||
show_image(out, "Threat");
|
show_image(out, "Threat");
|
||||||
cvWaitKey(10);
|
cvWaitKey(10);
|
||||||
}
|
}
|
||||||
@ -997,6 +969,85 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||||
|
{
|
||||||
|
#ifdef OPENCV
|
||||||
|
int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697};
|
||||||
|
|
||||||
|
printf("Classifier Demo\n");
|
||||||
|
network net = parse_network_cfg(cfgfile);
|
||||||
|
if(weightfile){
|
||||||
|
load_weights(&net, weightfile);
|
||||||
|
}
|
||||||
|
set_batch_network(&net, 1);
|
||||||
|
list *options = read_data_cfg(datacfg);
|
||||||
|
|
||||||
|
srand(2222222);
|
||||||
|
CvCapture * cap;
|
||||||
|
|
||||||
|
if(filename){
|
||||||
|
cap = cvCaptureFromFile(filename);
|
||||||
|
}else{
|
||||||
|
cap = cvCaptureFromCAM(cam_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
int top = option_find_int(options, "top", 1);
|
||||||
|
|
||||||
|
char *name_list = option_find_str(options, "names", 0);
|
||||||
|
char **names = get_labels(name_list);
|
||||||
|
|
||||||
|
int *indexes = calloc(top, sizeof(int));
|
||||||
|
|
||||||
|
if(!cap) error("Couldn't connect to webcam.\n");
|
||||||
|
cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL);
|
||||||
|
cvResizeWindow("Threat Detection", 512, 512);
|
||||||
|
float fps = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
while(1){
|
||||||
|
struct timeval tval_before, tval_after, tval_result;
|
||||||
|
gettimeofday(&tval_before, NULL);
|
||||||
|
|
||||||
|
image in = get_image_from_stream(cap);
|
||||||
|
image in_s = resize_image(in, net.w, net.h);
|
||||||
|
show_image(in, "Threat Detection");
|
||||||
|
|
||||||
|
float *predictions = network_predict(net, in_s.data);
|
||||||
|
top_predictions(net, top, indexes);
|
||||||
|
|
||||||
|
printf("\033[2J");
|
||||||
|
printf("\033[1;1H");
|
||||||
|
|
||||||
|
int threat = 0;
|
||||||
|
for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){
|
||||||
|
int index = bad_cats[i];
|
||||||
|
if(predictions[index] > .01){
|
||||||
|
printf("Threat Detected!\n");
|
||||||
|
threat = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!threat) printf("Scanning...\n");
|
||||||
|
for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){
|
||||||
|
int index = bad_cats[i];
|
||||||
|
if(predictions[index] > .01){
|
||||||
|
printf("%s\n", names[index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free_image(in_s);
|
||||||
|
free_image(in);
|
||||||
|
|
||||||
|
cvWaitKey(10);
|
||||||
|
|
||||||
|
gettimeofday(&tval_after, NULL);
|
||||||
|
timersub(&tval_after, &tval_before, &tval_result);
|
||||||
|
float curr = 1000000.f/((long int)tval_result.tv_usec);
|
||||||
|
fps = .9*fps + .1*curr;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||||
{
|
{
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
@ -1102,6 +1153,7 @@ void run_classifier(int argc, char **argv)
|
|||||||
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, clear);
|
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, clear);
|
||||||
else if(0==strcmp(argv[2], "trainm")) train_classifier_multi(data, cfg, weights, gpus, ngpus, clear);
|
else if(0==strcmp(argv[2], "trainm")) train_classifier_multi(data, cfg, weights, gpus, ngpus, clear);
|
||||||
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
|
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
|
||||||
|
else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
|
||||||
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
|
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
|
||||||
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
|
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
|
||||||
else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
|
else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
|
||||||
|
@ -28,7 +28,6 @@ void train_coco(char *cfgfile, char *weightfile)
|
|||||||
//char *train_images = "data/bags.train.list";
|
//char *train_images = "data/bags.train.list";
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char *backup_directory = "/home/pjreddie/backup/";
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
|
|
||||||
void train_compare(char *cfgfile, char *weightfile)
|
void train_compare(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
|
@ -204,10 +204,12 @@ void statistics_connected_layer(layer l)
|
|||||||
if(l.batch_normalize){
|
if(l.batch_normalize){
|
||||||
printf("Scales ");
|
printf("Scales ");
|
||||||
print_statistics(l.scales, l.outputs);
|
print_statistics(l.scales, l.outputs);
|
||||||
|
/*
|
||||||
printf("Rolling Mean ");
|
printf("Rolling Mean ");
|
||||||
print_statistics(l.rolling_mean, l.outputs);
|
print_statistics(l.rolling_mean, l.outputs);
|
||||||
printf("Rolling Variance ");
|
printf("Rolling Variance ");
|
||||||
print_statistics(l.rolling_variance, l.outputs);
|
print_statistics(l.rolling_variance, l.outputs);
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
printf("Biases ");
|
printf("Biases ");
|
||||||
print_statistics(l.biases, l.outputs);
|
print_statistics(l.biases, l.outputs);
|
||||||
|
@ -237,8 +237,10 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float
|
|||||||
axpy_ongpu(layer.n, learning_rate/batch, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
axpy_ongpu(layer.n, learning_rate/batch, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
||||||
scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1);
|
scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1);
|
||||||
|
|
||||||
axpy_ongpu(layer.n, learning_rate/batch, layer.scale_updates_gpu, 1, layer.scales_gpu, 1);
|
if(layer.scales_gpu){
|
||||||
scal_ongpu(layer.n, momentum, layer.scale_updates_gpu, 1);
|
axpy_ongpu(layer.n, learning_rate/batch, layer.scale_updates_gpu, 1, layer.scales_gpu, 1);
|
||||||
|
scal_ongpu(layer.n, momentum, layer.scale_updates_gpu, 1);
|
||||||
|
}
|
||||||
|
|
||||||
axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
|
axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
|
||||||
axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
|
axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
|
||||||
|
@ -241,9 +241,6 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
|||||||
l.biases_gpu = cuda_make_array(l.biases, n);
|
l.biases_gpu = cuda_make_array(l.biases, n);
|
||||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
||||||
|
|
||||||
l.scales_gpu = cuda_make_array(l.scales, n);
|
|
||||||
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
|
|
||||||
|
|
||||||
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
|
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
|
||||||
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||||
|
|
||||||
@ -265,6 +262,9 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
|||||||
l.mean_delta_gpu = cuda_make_array(l.mean, n);
|
l.mean_delta_gpu = cuda_make_array(l.mean, n);
|
||||||
l.variance_delta_gpu = cuda_make_array(l.variance, n);
|
l.variance_delta_gpu = cuda_make_array(l.variance, n);
|
||||||
|
|
||||||
|
l.scales_gpu = cuda_make_array(l.scales, n);
|
||||||
|
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
|
||||||
|
|
||||||
l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||||
l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||||
}
|
}
|
||||||
@ -511,6 +511,11 @@ void update_convolutional_layer(convolutional_layer l, int batch, float learning
|
|||||||
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
|
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
|
||||||
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
||||||
|
|
||||||
|
if(l.scales){
|
||||||
|
axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
|
||||||
|
scal_cpu(l.n, momentum, l.scale_updates, 1);
|
||||||
|
}
|
||||||
|
|
||||||
axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
|
axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
|
||||||
axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
|
axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
|
||||||
scal_cpu(size, momentum, l.weight_updates, 1);
|
scal_cpu(size, momentum, l.weight_updates, 1);
|
||||||
|
96
src/data.c
96
src/data.c
@ -7,7 +7,6 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
unsigned int data_seed;
|
|
||||||
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
list *get_paths(char *filename)
|
list *get_paths(char *filename)
|
||||||
@ -23,13 +22,14 @@ list *get_paths(char *filename)
|
|||||||
return lines;
|
return lines;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
char **get_random_paths_indexes(char **paths, int n, int m, int *indexes)
|
char **get_random_paths_indexes(char **paths, int n, int m, int *indexes)
|
||||||
{
|
{
|
||||||
char **random_paths = calloc(n, sizeof(char*));
|
char **random_paths = calloc(n, sizeof(char*));
|
||||||
int i;
|
int i;
|
||||||
pthread_mutex_lock(&mutex);
|
pthread_mutex_lock(&mutex);
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
int index = rand_r(&data_seed)%m;
|
int index = rand()%m;
|
||||||
indexes[i] = index;
|
indexes[i] = index;
|
||||||
random_paths[i] = paths[index];
|
random_paths[i] = paths[index];
|
||||||
if(i == 0) printf("%s\n", paths[index]);
|
if(i == 0) printf("%s\n", paths[index]);
|
||||||
@ -37,6 +37,7 @@ char **get_random_paths_indexes(char **paths, int n, int m, int *indexes)
|
|||||||
pthread_mutex_unlock(&mutex);
|
pthread_mutex_unlock(&mutex);
|
||||||
return random_paths;
|
return random_paths;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
char **get_random_paths(char **paths, int n, int m)
|
char **get_random_paths(char **paths, int n, int m)
|
||||||
{
|
{
|
||||||
@ -44,7 +45,7 @@ char **get_random_paths(char **paths, int n, int m)
|
|||||||
int i;
|
int i;
|
||||||
pthread_mutex_lock(&mutex);
|
pthread_mutex_lock(&mutex);
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
int index = rand_r(&data_seed)%m;
|
int index = rand()%m;
|
||||||
random_paths[i] = paths[index];
|
random_paths[i] = paths[index];
|
||||||
if(i == 0) printf("%s\n", paths[index]);
|
if(i == 0) printf("%s\n", paths[index]);
|
||||||
}
|
}
|
||||||
@ -111,7 +112,7 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,
|
|||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
image im = load_image_color(paths[i], 0, 0);
|
image im = load_image_color(paths[i], 0, 0);
|
||||||
image crop = random_augment_image(im, angle, aspect, min, max, size);
|
image crop = random_augment_image(im, angle, aspect, min, max, size);
|
||||||
int flip = rand_r(&data_seed)%2;
|
int flip = rand()%2;
|
||||||
if (flip) flip_image(crop);
|
if (flip) flip_image(crop);
|
||||||
random_distort_image(crop, hue, saturation, exposure);
|
random_distort_image(crop, hue, saturation, exposure);
|
||||||
|
|
||||||
@ -159,7 +160,7 @@ void randomize_boxes(box_label *b, int n)
|
|||||||
int i;
|
int i;
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
box_label swap = b[i];
|
box_label swap = b[i];
|
||||||
int index = rand_r(&data_seed)%n;
|
int index = rand()%n;
|
||||||
b[i] = b[index];
|
b[i] = b[index];
|
||||||
b[index] = swap;
|
b[index] = swap;
|
||||||
}
|
}
|
||||||
@ -430,9 +431,6 @@ char **get_labels(char *filename)
|
|||||||
|
|
||||||
void free_data(data d)
|
void free_data(data d)
|
||||||
{
|
{
|
||||||
if(d.indexes){
|
|
||||||
free(d.indexes);
|
|
||||||
}
|
|
||||||
if(!d.shallow){
|
if(!d.shallow){
|
||||||
free_matrix(d.X);
|
free_matrix(d.X);
|
||||||
free_matrix(d.y);
|
free_matrix(d.y);
|
||||||
@ -476,7 +474,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
|
|||||||
float sx = (float)swidth / ow;
|
float sx = (float)swidth / ow;
|
||||||
float sy = (float)sheight / oh;
|
float sy = (float)sheight / oh;
|
||||||
|
|
||||||
int flip = rand_r(&data_seed)%2;
|
int flip = rand()%2;
|
||||||
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
|
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
|
||||||
|
|
||||||
float dx = ((float)pleft/ow)/sx;
|
float dx = ((float)pleft/ow)/sx;
|
||||||
@ -560,7 +558,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
|
|||||||
|
|
||||||
data load_data_swag(char **paths, int n, int classes, float jitter)
|
data load_data_swag(char **paths, int n, int classes, float jitter)
|
||||||
{
|
{
|
||||||
int index = rand_r(&data_seed)%n;
|
int index = rand()%n;
|
||||||
char *random_path = paths[index];
|
char *random_path = paths[index];
|
||||||
|
|
||||||
image orig = load_image_color(random_path, 0, 0);
|
image orig = load_image_color(random_path, 0, 0);
|
||||||
@ -593,7 +591,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
|
|||||||
float sx = (float)swidth / w;
|
float sx = (float)swidth / w;
|
||||||
float sy = (float)sheight / h;
|
float sy = (float)sheight / h;
|
||||||
|
|
||||||
int flip = rand_r(&data_seed)%2;
|
int flip = rand()%2;
|
||||||
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
|
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
|
||||||
|
|
||||||
float dx = ((float)pleft/w)/sx;
|
float dx = ((float)pleft/w)/sx;
|
||||||
@ -643,7 +641,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
|
|||||||
float sx = (float)swidth / ow;
|
float sx = (float)swidth / ow;
|
||||||
float sy = (float)sheight / oh;
|
float sy = (float)sheight / oh;
|
||||||
|
|
||||||
int flip = rand_r(&data_seed)%2;
|
int flip = rand()%2;
|
||||||
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
|
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
|
||||||
|
|
||||||
float dx = ((float)pleft/ow)/sx;
|
float dx = ((float)pleft/ow)/sx;
|
||||||
@ -666,26 +664,18 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
|
|||||||
|
|
||||||
void *load_thread(void *ptr)
|
void *load_thread(void *ptr)
|
||||||
{
|
{
|
||||||
|
//printf("Loading data: %d\n", rand());
|
||||||
#ifdef GPU
|
|
||||||
cudaError_t status = cudaSetDevice(gpu_index);
|
|
||||||
check_error(status);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//printf("Loading data: %d\n", rand_r(&data_seed));
|
|
||||||
load_args a = *(struct load_args*)ptr;
|
load_args a = *(struct load_args*)ptr;
|
||||||
if(a.exposure == 0) a.exposure = 1;
|
if(a.exposure == 0) a.exposure = 1;
|
||||||
if(a.saturation == 0) a.saturation = 1;
|
if(a.saturation == 0) a.saturation = 1;
|
||||||
if(a.aspect == 0) a.aspect = 1;
|
if(a.aspect == 0) a.aspect = 1;
|
||||||
|
|
||||||
if (a.type == OLD_CLASSIFICATION_DATA){
|
if (a.type == OLD_CLASSIFICATION_DATA){
|
||||||
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||||
} else if (a.type == CLASSIFICATION_DATA){
|
} else if (a.type == CLASSIFICATION_DATA){
|
||||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||||
} else if (a.type == SUPER_DATA){
|
} else if (a.type == SUPER_DATA){
|
||||||
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
|
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
|
||||||
} else if (a.type == STUDY_DATA){
|
|
||||||
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
|
||||||
} else if (a.type == WRITING_DATA){
|
} else if (a.type == WRITING_DATA){
|
||||||
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
|
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
|
||||||
} else if (a.type == REGION_DATA){
|
} else if (a.type == REGION_DATA){
|
||||||
@ -701,7 +691,6 @@ void *load_thread(void *ptr)
|
|||||||
*(a.resized) = resize_image(*(a.im), a.w, a.h);
|
*(a.resized) = resize_image(*(a.im), a.w, a.h);
|
||||||
} else if (a.type == TAG_DATA){
|
} else if (a.type == TAG_DATA){
|
||||||
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||||
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
|
||||||
}
|
}
|
||||||
free(ptr);
|
free(ptr);
|
||||||
return 0;
|
return 0;
|
||||||
@ -716,6 +705,43 @@ pthread_t load_data_in_thread(load_args args)
|
|||||||
return thread;
|
return thread;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void *load_threads(void *ptr)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
load_args args = *(load_args *)ptr;
|
||||||
|
data *out = args.d;
|
||||||
|
int total = args.n;
|
||||||
|
free(ptr);
|
||||||
|
data *buffers = calloc(args.threads, sizeof(data));
|
||||||
|
pthread_t *threads = calloc(args.threads, sizeof(pthread_t));
|
||||||
|
for(i = 0; i < args.threads; ++i){
|
||||||
|
args.d = buffers + i;
|
||||||
|
args.n = (i+1) * total/args.threads - i * total/args.threads;
|
||||||
|
threads[i] = load_data_in_thread(args);
|
||||||
|
}
|
||||||
|
for(i = 0; i < args.threads; ++i){
|
||||||
|
pthread_join(threads[i], 0);
|
||||||
|
}
|
||||||
|
*out = concat_datas(buffers, args.threads);
|
||||||
|
out->shallow = 0;
|
||||||
|
for(i = 0; i < args.threads; ++i){
|
||||||
|
buffers[i].shallow = 1;
|
||||||
|
free_data(buffers[i]);
|
||||||
|
}
|
||||||
|
free(buffers);
|
||||||
|
free(threads);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_t load_data(load_args args)
|
||||||
|
{
|
||||||
|
pthread_t thread;
|
||||||
|
struct load_args *ptr = calloc(1, sizeof(struct load_args));
|
||||||
|
*ptr = args;
|
||||||
|
if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
|
||||||
|
return thread;
|
||||||
|
}
|
||||||
|
|
||||||
data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h)
|
data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h)
|
||||||
{
|
{
|
||||||
if(m) paths = get_random_paths(paths, n, m);
|
if(m) paths = get_random_paths(paths, n, m);
|
||||||
@ -731,7 +757,7 @@ data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int
|
|||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
|
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h)
|
||||||
{
|
{
|
||||||
if(m) paths = get_random_paths(paths, n, m);
|
if(m) paths = get_random_paths(paths, n, m);
|
||||||
data d = {0};
|
data d = {0};
|
||||||
@ -742,6 +768,7 @@ data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
|
|||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
|
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
|
||||||
{
|
{
|
||||||
data d = {0};
|
data d = {0};
|
||||||
@ -753,6 +780,7 @@ data load_data_study(char **paths, int n, int m, char **labels, int k, int min,
|
|||||||
if(m) free(paths);
|
if(m) free(paths);
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
||||||
{
|
{
|
||||||
@ -772,7 +800,7 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
|||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
image im = load_image_color(paths[i], 0, 0);
|
image im = load_image_color(paths[i], 0, 0);
|
||||||
image crop = random_crop_image(im, w*scale, h*scale);
|
image crop = random_crop_image(im, w*scale, h*scale);
|
||||||
int flip = rand_r(&data_seed)%2;
|
int flip = rand()%2;
|
||||||
if (flip) flip_image(crop);
|
if (flip) flip_image(crop);
|
||||||
image resize = resize_image(crop, w, h);
|
image resize = resize_image(crop, w, h);
|
||||||
d.X.vals[i] = resize.data;
|
d.X.vals[i] = resize.data;
|
||||||
@ -837,7 +865,6 @@ data concat_datas(data *d, int n)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
data out = {0};
|
data out = {0};
|
||||||
out.shallow = 1;
|
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
data new = concat_data(d[i], out);
|
data new = concat_data(d[i], out);
|
||||||
free_data(out);
|
free_data(out);
|
||||||
@ -895,7 +922,7 @@ void get_random_batch(data d, int n, float *X, float *y)
|
|||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
for(j = 0; j < n; ++j){
|
for(j = 0; j < n; ++j){
|
||||||
int index = rand_r(&data_seed)%d.X.rows;
|
int index = rand()%d.X.rows;
|
||||||
memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float));
|
memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float));
|
||||||
memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float));
|
memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float));
|
||||||
}
|
}
|
||||||
@ -1008,7 +1035,7 @@ void randomize_data(data d)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for(i = d.X.rows-1; i > 0; --i){
|
for(i = d.X.rows-1; i > 0; --i){
|
||||||
int index = rand_r(&data_seed)%i;
|
int index = rand()%i;
|
||||||
float *swap = d.X.vals[index];
|
float *swap = d.X.vals[index];
|
||||||
d.X.vals[index] = d.X.vals[i];
|
d.X.vals[index] = d.X.vals[i];
|
||||||
d.X.vals[i] = swap;
|
d.X.vals[i] = swap;
|
||||||
@ -1043,6 +1070,19 @@ void normalize_data_rows(data d)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data get_data_part(data d, int part, int total)
|
||||||
|
{
|
||||||
|
data p = {0};
|
||||||
|
p.shallow = 1;
|
||||||
|
p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total;
|
||||||
|
p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total;
|
||||||
|
p.X.cols = d.X.cols;
|
||||||
|
p.y.cols = d.y.cols;
|
||||||
|
p.X.vals = d.X.vals + d.X.rows * part / total;
|
||||||
|
p.y.vals = d.y.vals + d.y.rows * part / total;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
data get_random_data(data d, int num)
|
data get_random_data(data d, int num)
|
||||||
{
|
{
|
||||||
data r = {0};
|
data r = {0};
|
||||||
|
10
src/data.h
10
src/data.h
@ -6,8 +6,6 @@
|
|||||||
#include "list.h"
|
#include "list.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
|
||||||
extern unsigned int data_seed;
|
|
||||||
|
|
||||||
static inline float distance_from_edge(int x, int max)
|
static inline float distance_from_edge(int x, int max)
|
||||||
{
|
{
|
||||||
int dx = (max/2) - x;
|
int dx = (max/2) - x;
|
||||||
@ -23,7 +21,6 @@ typedef struct{
|
|||||||
int w, h;
|
int w, h;
|
||||||
matrix X;
|
matrix X;
|
||||||
matrix y;
|
matrix y;
|
||||||
int *indexes;
|
|
||||||
int shallow;
|
int shallow;
|
||||||
int *num_boxes;
|
int *num_boxes;
|
||||||
box **boxes;
|
box **boxes;
|
||||||
@ -34,6 +31,7 @@ typedef enum {
|
|||||||
} data_type;
|
} data_type;
|
||||||
|
|
||||||
typedef struct load_args{
|
typedef struct load_args{
|
||||||
|
int threads;
|
||||||
char **paths;
|
char **paths;
|
||||||
char *path;
|
char *path;
|
||||||
int n;
|
int n;
|
||||||
@ -70,17 +68,18 @@ typedef struct{
|
|||||||
|
|
||||||
void free_data(data d);
|
void free_data(data d);
|
||||||
|
|
||||||
|
pthread_t load_data(load_args args);
|
||||||
|
|
||||||
pthread_t load_data_in_thread(load_args args);
|
pthread_t load_data_in_thread(load_args args);
|
||||||
|
|
||||||
void print_letters(float *pred, int n);
|
void print_letters(float *pred, int n);
|
||||||
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
|
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
|
||||||
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
|
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
|
||||||
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
|
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
|
||||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
|
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
|
||||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
||||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
|
||||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||||
data load_go(char *filename);
|
data load_go(char *filename);
|
||||||
|
|
||||||
@ -93,6 +92,7 @@ data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int
|
|||||||
list *get_paths(char *filename);
|
list *get_paths(char *filename);
|
||||||
char **get_labels(char *filename);
|
char **get_labels(char *filename);
|
||||||
void get_random_batch(data d, int n, float *X, float *y);
|
void get_random_batch(data d, int n, float *X, float *y);
|
||||||
|
data get_data_part(data d, int part, int total);
|
||||||
data get_random_data(data d, int num);
|
data get_random_data(data d, int num);
|
||||||
void get_next_batch(data d, int n, int offset, float *X, float *y);
|
void get_next_batch(data d, int n, int offset, float *X, float *y);
|
||||||
data load_categorical_data_csv(char *filename, int target, int k);
|
data load_categorical_data_csv(char *filename, int target, int k);
|
||||||
|
@ -17,7 +17,6 @@ void train_detector(char *cfgfile, char *weightfile)
|
|||||||
char *train_images = "/data/voc/train.txt";
|
char *train_images = "/data/voc/train.txt";
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char *backup_directory = "/home/pjreddie/backup/";
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
|
@ -6,7 +6,6 @@ char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"};
|
|||||||
|
|
||||||
void train_dice(char *cfgfile, char *weightfile)
|
void train_dice(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
@ -27,7 +26,7 @@ void train_dice(char *cfgfile, char *weightfile)
|
|||||||
while(1){
|
while(1){
|
||||||
++i;
|
++i;
|
||||||
time=clock();
|
time=clock();
|
||||||
data train = load_data(paths, imgs, plist->size, labels, 6, net.w, net.h);
|
data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h);
|
||||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||||
|
|
||||||
time=clock();
|
time=clock();
|
||||||
@ -60,7 +59,7 @@ void validate_dice(char *filename, char *weightfile)
|
|||||||
int m = plist->size;
|
int m = plist->size;
|
||||||
free_list(plist);
|
free_list(plist);
|
||||||
|
|
||||||
data val = load_data(paths, m, 0, labels, 6, net.w, net.h);
|
data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h);
|
||||||
float *acc = network_accuracies(net, val, 2);
|
float *acc = network_accuracies(net, val, 2);
|
||||||
printf("Validation Accuracy: %f, %d images\n", acc[0], m);
|
printf("Validation Accuracy: %f, %d images\n", acc[0], m);
|
||||||
free_data(val);
|
free_data(val);
|
||||||
|
2
src/go.c
2
src/go.c
@ -116,7 +116,6 @@ void random_go_moves(moves m, float *boards, float *labels, int n)
|
|||||||
|
|
||||||
void train_go(char *cfgfile, char *weightfile)
|
void train_go(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
@ -401,7 +400,6 @@ int generate_move(network net, int player, float *board, int multi, float thresh
|
|||||||
|
|
||||||
void valid_go(char *cfgfile, char *weightfile, int multi)
|
void valid_go(char *cfgfile, char *weightfile, int multi)
|
||||||
{
|
{
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
#include <assert.h>
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "data.h"
|
#include "data.h"
|
||||||
@ -356,6 +357,7 @@ float train_network_sgd(network net, data d, int n)
|
|||||||
|
|
||||||
float train_network(network net, data d)
|
float train_network(network net, data d)
|
||||||
{
|
{
|
||||||
|
assert(d.X.rows % net.batch == 0);
|
||||||
int batch = net.batch;
|
int batch = net.batch;
|
||||||
int n = d.X.rows / batch;
|
int n = d.X.rows / batch;
|
||||||
float *X = calloc(batch*d.X.cols, sizeof(float));
|
float *X = calloc(batch*d.X.cols, sizeof(float));
|
||||||
|
@ -65,7 +65,8 @@ typedef struct network_state {
|
|||||||
} network_state;
|
} network_state;
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
float train_networks(network *nets, int n, data d);
|
float train_networks(network *nets, int n, data d, int interval);
|
||||||
|
void sync_nets(network *nets, int n, int interval);
|
||||||
float train_network_datum_gpu(network net, float *x, float *y);
|
float train_network_datum_gpu(network net, float *x, float *y);
|
||||||
float *network_predict_gpu(network net, float *input);
|
float *network_predict_gpu(network net, float *input);
|
||||||
float * get_network_output_gpu_layer(network net, int i);
|
float * get_network_output_gpu_layer(network net, int i);
|
||||||
|
@ -219,34 +219,32 @@ float train_network_datum_gpu(network net, float *x, float *y)
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
network net;
|
network net;
|
||||||
float *X;
|
data d;
|
||||||
float *y;
|
float *err;
|
||||||
} train_args;
|
} train_args;
|
||||||
|
|
||||||
void *train_thread(void *ptr)
|
void *train_thread(void *ptr)
|
||||||
{
|
{
|
||||||
train_args args = *(train_args*)ptr;
|
train_args args = *(train_args*)ptr;
|
||||||
|
|
||||||
cuda_set_device(args.net.gpu_index);
|
|
||||||
forward_backward_network_gpu(args.net, args.X, args.y);
|
|
||||||
free(ptr);
|
free(ptr);
|
||||||
|
cuda_set_device(args.net.gpu_index);
|
||||||
|
*args.err = train_network(args.net, args.d);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_t train_network_in_thread(network net, float *X, float *y)
|
pthread_t train_network_in_thread(network net, data d, float *err)
|
||||||
{
|
{
|
||||||
pthread_t thread;
|
pthread_t thread;
|
||||||
train_args *ptr = (train_args *)calloc(1, sizeof(train_args));
|
train_args *ptr = (train_args *)calloc(1, sizeof(train_args));
|
||||||
ptr->net = net;
|
ptr->net = net;
|
||||||
ptr->X = X;
|
ptr->d = d;
|
||||||
ptr->y = y;
|
ptr->err = err;
|
||||||
if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed");
|
if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed");
|
||||||
return thread;
|
return thread;
|
||||||
}
|
}
|
||||||
|
|
||||||
void pull_updates(layer l)
|
void pull_updates(layer l)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
|
||||||
if(l.type == CONVOLUTIONAL){
|
if(l.type == CONVOLUTIONAL){
|
||||||
cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
||||||
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.n*l.size*l.size*l.c);
|
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.n*l.size*l.size*l.c);
|
||||||
@ -255,12 +253,10 @@ void pull_updates(layer l)
|
|||||||
cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
|
cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
|
||||||
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
|
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void push_updates(layer l)
|
void push_updates(layer l)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
|
||||||
if(l.type == CONVOLUTIONAL){
|
if(l.type == CONVOLUTIONAL){
|
||||||
cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
||||||
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.n*l.size*l.size*l.c);
|
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.n*l.size*l.size*l.c);
|
||||||
@ -269,9 +265,95 @@ void push_updates(layer l)
|
|||||||
cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
|
cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
|
||||||
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
|
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void update_layer(layer l, network net)
|
||||||
|
{
|
||||||
|
int update_batch = net.batch*net.subdivisions;
|
||||||
|
float rate = get_current_rate(net);
|
||||||
|
if(l.type == CONVOLUTIONAL){
|
||||||
|
update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||||
|
} else if(l.type == DECONVOLUTIONAL){
|
||||||
|
update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay);
|
||||||
|
} else if(l.type == CONNECTED){
|
||||||
|
update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||||
|
} else if(l.type == RNN){
|
||||||
|
update_rnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||||
|
} else if(l.type == GRU){
|
||||||
|
update_gru_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||||
|
} else if(l.type == CRNN){
|
||||||
|
update_crnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||||
|
} else if(l.type == LOCAL){
|
||||||
|
update_local_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge_weights(layer l, layer base)
|
||||||
|
{
|
||||||
|
if (l.type == CONVOLUTIONAL) {
|
||||||
|
axpy_cpu(l.n, 1, l.biases, 1, base.biases, 1);
|
||||||
|
axpy_cpu(l.n*l.size*l.size*l.c, 1, l.weights, 1, base.weights, 1);
|
||||||
|
if (l.scales) {
|
||||||
|
axpy_cpu(l.n, 1, l.scales, 1, base.scales, 1);
|
||||||
|
}
|
||||||
|
} else if(l.type == CONNECTED) {
|
||||||
|
axpy_cpu(l.outputs, 1, l.biases, 1, base.biases, 1);
|
||||||
|
axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, base.weights, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void scale_weights(layer l, float s)
|
||||||
|
{
|
||||||
|
if (l.type == CONVOLUTIONAL) {
|
||||||
|
scal_cpu(l.n, s, l.biases, 1);
|
||||||
|
scal_cpu(l.n*l.size*l.size*l.c, s, l.weights, 1);
|
||||||
|
if (l.scales) {
|
||||||
|
scal_cpu(l.n, s, l.scales, 1);
|
||||||
|
}
|
||||||
|
} else if(l.type == CONNECTED) {
|
||||||
|
scal_cpu(l.outputs, s, l.biases, 1);
|
||||||
|
scal_cpu(l.outputs*l.inputs, s, l.weights, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void pull_weights(layer l)
|
||||||
|
{
|
||||||
|
if(l.type == CONVOLUTIONAL){
|
||||||
|
cuda_pull_array(l.biases_gpu, l.biases, l.n);
|
||||||
|
cuda_pull_array(l.weights_gpu, l.weights, l.n*l.size*l.size*l.c);
|
||||||
|
if(l.scales) cuda_pull_array(l.scales_gpu, l.scales, l.n);
|
||||||
|
} else if(l.type == CONNECTED){
|
||||||
|
cuda_pull_array(l.biases_gpu, l.biases, l.outputs);
|
||||||
|
cuda_pull_array(l.weights_gpu, l.weights, l.outputs*l.inputs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_weights(layer l)
|
||||||
|
{
|
||||||
|
if(l.type == CONVOLUTIONAL){
|
||||||
|
cuda_push_array(l.biases_gpu, l.biases, l.n);
|
||||||
|
cuda_push_array(l.weights_gpu, l.weights, l.n*l.size*l.size*l.c);
|
||||||
|
if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n);
|
||||||
|
} else if(l.type == CONNECTED){
|
||||||
|
cuda_push_array(l.biases_gpu, l.biases, l.outputs);
|
||||||
|
cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void distribute_weights(layer l, layer base)
|
||||||
|
{
|
||||||
|
if(l.type == CONVOLUTIONAL){
|
||||||
|
cuda_push_array(l.biases_gpu, base.biases, l.n);
|
||||||
|
cuda_push_array(l.weights_gpu, base.weights, l.n*l.size*l.size*l.c);
|
||||||
|
if(base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n);
|
||||||
|
} else if(l.type == CONNECTED){
|
||||||
|
cuda_push_array(l.biases_gpu, base.biases, l.outputs);
|
||||||
|
cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void merge_updates(layer l, layer base)
|
void merge_updates(layer l, layer base)
|
||||||
{
|
{
|
||||||
if (l.type == CONVOLUTIONAL) {
|
if (l.type == CONVOLUTIONAL) {
|
||||||
@ -288,79 +370,110 @@ void merge_updates(layer l, layer base)
|
|||||||
|
|
||||||
void distribute_updates(layer l, layer base)
|
void distribute_updates(layer l, layer base)
|
||||||
{
|
{
|
||||||
if (l.type == CONVOLUTIONAL) {
|
if(l.type == CONVOLUTIONAL){
|
||||||
copy_cpu(l.n, base.bias_updates, 1, l.bias_updates, 1);
|
cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n);
|
||||||
copy_cpu(l.n*l.size*l.size*l.c, base.weight_updates, 1, l.weight_updates, 1);
|
cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.n*l.size*l.size*l.c);
|
||||||
if (l.scale_updates) {
|
if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n);
|
||||||
copy_cpu(l.n, base.scale_updates, 1, l.scale_updates, 1);
|
} else if(l.type == CONNECTED){
|
||||||
}
|
cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs);
|
||||||
} else if(l.type == CONNECTED) {
|
cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs);
|
||||||
copy_cpu(l.outputs, base.bias_updates, 1, l.bias_updates, 1);
|
|
||||||
copy_cpu(l.outputs*l.inputs, base.weight_updates, 1, l.weight_updates, 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void sync_updates(network *nets, int n)
|
void sync_layer(network *nets, int n, int j)
|
||||||
{
|
{
|
||||||
int i,j;
|
//printf("Syncing layer %d\n", j);
|
||||||
int layers = nets[0].n;
|
int i;
|
||||||
network net = nets[0];
|
network net = nets[0];
|
||||||
for (j = 0; j < layers; ++j) {
|
layer base = net.layers[j];
|
||||||
layer base = net.layers[j];
|
cuda_set_device(net.gpu_index);
|
||||||
cuda_set_device(net.gpu_index);
|
pull_weights(base);
|
||||||
pull_updates(base);
|
for (i = 1; i < n; ++i) {
|
||||||
for (i = 1; i < n; ++i) {
|
cuda_set_device(nets[i].gpu_index);
|
||||||
cuda_set_device(nets[i].gpu_index);
|
layer l = nets[i].layers[j];
|
||||||
layer l = nets[i].layers[j];
|
pull_weights(l);
|
||||||
pull_updates(l);
|
merge_weights(l, base);
|
||||||
merge_updates(l, base);
|
|
||||||
}
|
|
||||||
for (i = 1; i < n; ++i) {
|
|
||||||
cuda_set_device(nets[i].gpu_index);
|
|
||||||
layer l = nets[i].layers[j];
|
|
||||||
distribute_updates(l, base);
|
|
||||||
push_updates(l);
|
|
||||||
}
|
|
||||||
cuda_set_device(net.gpu_index);
|
|
||||||
push_updates(base);
|
|
||||||
}
|
}
|
||||||
|
scale_weights(base, 1./n);
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
cuda_set_device(nets[i].gpu_index);
|
cuda_set_device(nets[i].gpu_index);
|
||||||
if(i > 0) nets[i].momentum = 0;
|
layer l = nets[i].layers[j];
|
||||||
update_network_gpu(nets[i]);
|
distribute_weights(l, base);
|
||||||
}
|
}
|
||||||
|
//printf("Done syncing layer %d\n", j);
|
||||||
}
|
}
|
||||||
|
|
||||||
float train_networks(network *nets, int n, data d)
|
typedef struct{
|
||||||
{
|
network *nets;
|
||||||
int batch = nets[0].batch;
|
int n;
|
||||||
assert(batch * n == d.X.rows);
|
int j;
|
||||||
assert(nets[0].subdivisions % n == 0);
|
} sync_args;
|
||||||
float **X = (float **) calloc(n, sizeof(float *));
|
|
||||||
float **y = (float **) calloc(n, sizeof(float *));
|
|
||||||
pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
|
|
||||||
|
|
||||||
|
void *sync_layer_thread(void *ptr)
|
||||||
|
{
|
||||||
|
sync_args args = *(sync_args*)ptr;
|
||||||
|
sync_layer(args.nets, args.n, args.j);
|
||||||
|
free(ptr);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_t sync_layer_in_thread(network *nets, int n, int j)
|
||||||
|
{
|
||||||
|
pthread_t thread;
|
||||||
|
sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args));
|
||||||
|
ptr->nets = nets;
|
||||||
|
ptr->n = n;
|
||||||
|
ptr->j = j;
|
||||||
|
if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed");
|
||||||
|
return thread;
|
||||||
|
}
|
||||||
|
|
||||||
|
void sync_nets(network *nets, int n, int interval)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
int layers = nets[0].n;
|
||||||
|
pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t));
|
||||||
|
|
||||||
|
*nets[0].seen += interval * (n-1) * nets[0].batch * nets[0].subdivisions;
|
||||||
|
for (j = 0; j < n; ++j){
|
||||||
|
*nets[j].seen = *nets[0].seen;
|
||||||
|
}
|
||||||
|
for (j = 0; j < layers; ++j) {
|
||||||
|
threads[j] = sync_layer_in_thread(nets, n, j);
|
||||||
|
}
|
||||||
|
for (j = 0; j < layers; ++j) {
|
||||||
|
pthread_join(threads[j], 0);
|
||||||
|
}
|
||||||
|
free(threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
float train_networks(network *nets, int n, data d, int interval)
|
||||||
|
{
|
||||||
int i;
|
int i;
|
||||||
|
int batch = nets[0].batch;
|
||||||
|
int subdivisions = nets[0].subdivisions;
|
||||||
|
assert(batch * subdivisions * n == d.X.rows);
|
||||||
|
pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
|
||||||
|
float *errors = (float *) calloc(n, sizeof(float));
|
||||||
|
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
X[i] = (float *) calloc(batch*d.X.cols, sizeof(float));
|
data p = get_data_part(d, i, n);
|
||||||
y[i] = (float *) calloc(batch*d.y.cols, sizeof(float));
|
threads[i] = train_network_in_thread(nets[i], p, errors + i);
|
||||||
get_next_batch(d, batch, i*batch, X[i], y[i]);
|
|
||||||
threads[i] = train_network_in_thread(nets[i], X[i], y[i]);
|
|
||||||
}
|
}
|
||||||
for(i = 0; i < n; ++i){
|
for(i = 0; i < n; ++i){
|
||||||
pthread_join(threads[i], 0);
|
pthread_join(threads[i], 0);
|
||||||
*nets[i].seen += n*nets[i].batch;
|
printf("%f\n", errors[i]);
|
||||||
printf("%f\n", get_network_cost(nets[i]) / batch);
|
sum += errors[i];
|
||||||
sum += get_network_cost(nets[i]);
|
}
|
||||||
free(X[i]);
|
if (get_current_batch(nets[0]) % interval == 0) {
|
||||||
free(y[i]);
|
printf("Syncing... ");
|
||||||
|
sync_nets(nets, n, interval);
|
||||||
|
printf("Done!\n");
|
||||||
}
|
}
|
||||||
if (((*nets[0].seen) / nets[0].batch) % nets[0].subdivisions == 0) sync_updates(nets, n);
|
|
||||||
free(X);
|
|
||||||
free(y);
|
|
||||||
free(threads);
|
free(threads);
|
||||||
return (float)sum/(n*batch);
|
free(errors);
|
||||||
|
return (float)sum/(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
float *get_network_output_layer_gpu(network net, int i)
|
float *get_network_output_layer_gpu(network net, int i)
|
||||||
|
@ -954,7 +954,9 @@ void save_connected_weights(layer l, FILE *fp)
|
|||||||
void save_weights_upto(network net, char *filename, int cutoff)
|
void save_weights_upto(network net, char *filename, int cutoff)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
if(net.gpu_index >= 0){
|
||||||
cuda_set_device(net.gpu_index);
|
cuda_set_device(net.gpu_index);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
fprintf(stderr, "Saving weights to %s\n", filename);
|
fprintf(stderr, "Saving weights to %s\n", filename);
|
||||||
FILE *fp = fopen(filename, "w");
|
FILE *fp = fopen(filename, "w");
|
||||||
@ -1120,7 +1122,9 @@ void load_convolutional_weights(layer l, FILE *fp)
|
|||||||
void load_weights_upto(network *net, char *filename, int cutoff)
|
void load_weights_upto(network *net, char *filename, int cutoff)
|
||||||
{
|
{
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
if(net->gpu_index >= 0){
|
||||||
cuda_set_device(net->gpu_index);
|
cuda_set_device(net->gpu_index);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
fprintf(stderr, "Loading weights from %s...", filename);
|
fprintf(stderr, "Loading weights from %s...", filename);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
@ -129,7 +129,6 @@ void reset_rnn_state(network net, int b)
|
|||||||
void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized)
|
void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized)
|
||||||
{
|
{
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
|
||||||
unsigned char *text = 0;
|
unsigned char *text = 0;
|
||||||
int *tokens = 0;
|
int *tokens = 0;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
@ -76,7 +76,6 @@ void train_vid_rnn(char *cfgfile, char *weightfile)
|
|||||||
char *train_videos = "data/vid/train.txt";
|
char *train_videos = "data/vid/train.txt";
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char *backup_directory = "/home/pjreddie/backup/";
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
|
@ -12,7 +12,6 @@ void train_super(char *cfgfile, char *weightfile)
|
|||||||
char *train_images = "/data/imagenet/imagenet1k.train.list";
|
char *train_images = "/data/imagenet/imagenet1k.train.list";
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char *backup_directory = "/home/pjreddie/backup/";
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
|
@ -14,7 +14,6 @@ void train_swag(char *cfgfile, char *weightfile)
|
|||||||
char *train_images = "data/voc.0712.trainval";
|
char *train_images = "data/voc.0712.trainval";
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char *backup_directory = "/home/pjreddie/backup/";
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
|
|
||||||
void train_tag(char *cfgfile, char *weightfile, int clear)
|
void train_tag(char *cfgfile, char *weightfile, int clear)
|
||||||
{
|
{
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
|
@ -48,7 +48,6 @@ void train_voxel(char *cfgfile, char *weightfile)
|
|||||||
char *train_images = "/data/imagenet/imagenet1k.train.list";
|
char *train_images = "/data/imagenet/imagenet1k.train.list";
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char *backup_directory = "/home/pjreddie/backup/";
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
void train_writing(char *cfgfile, char *weightfile)
|
void train_writing(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char *backup_directory = "/home/pjreddie/backup/";
|
||||||
data_seed = time(0);
|
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
|
@ -18,7 +18,6 @@ void train_yolo(char *cfgfile, char *weightfile)
|
|||||||
char *train_images = "/data/voc/train.txt";
|
char *train_images = "/data/voc/train.txt";
|
||||||
char *backup_directory = "/home/pjreddie/backup/";
|
char *backup_directory = "/home/pjreddie/backup/";
|
||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
printf("%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user