@ -0,0 +1,55 @@
cmake_minimum_required(VERSION 3.5)
project(darknet C)
set(GPU 1)
set(CUDNN 0)
set(OPENCV 0)
set(OPENMP 0)
set(DEBUG 1)
set(DEBUG 1)
include_directories(src include)
set (OPTS -Ofast)
if ( DEBUG )
set(OPTS ${OPTS} -O0 -g )
set ( NVCC nvcc )
set ( AR ar )
set ( ARFLAGS rcs )
set ( LDFLAGS -lm -pthread )
set ( CFLAGS ${OPTS} -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC)
if ( OPENMP )
set(CFLAGS ${CFLAGS} -fopenmp )
if ( OPENCV )
set(LDFLAGS ${LDFLAGS} -L${env.OPENCV_HOME}/lib -lopencv_core -lstdc++)
if ( GPU )
set(COMMON ${COMMON} -DGPU -I/usr/local/cuda/include/ )
set(LDFLAGS ${LDFLAGS} -lstdc++ -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand)
if ( CUDNN )
set(LDFLAGS ${LDFLAGS} -lcudnn)

@ -0,0 +1,6 @@
add_executable(darknet ${EXAMPLE_SOURCES} )
TARGET_LINK_LIBRARIES(darknet DarkNet pthread m)

View File

@ -287,7 +287,7 @@ void validate_attention_single(char *datacfg, char *filename, char *weightfile)
copy_cpu(classes, pred, 1, avgs, 1);
top_k(pred + classes, divs*divs, divs*divs, inds);
show_image(crop, "crop");
show_image(crop, "crop", 0);
for(j = 0; j < extra; ++j){
int index = inds[j];
int row = index / divs;
@ -298,7 +298,7 @@ void validate_attention_single(char *datacfg, char *filename, char *weightfile)
image tile = crop_image(crop, x, y, net->w, net->h);
float *pred = network_predict(net,;
axpy_cpu(classes, 1., pred, 1, avgs, 1);
show_image(tile, "tile");
show_image(tile, "tile", 0);
if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1);

View File

@ -9,16 +9,16 @@ void train_swag(char *cfgfile, char *weightfile)
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
network net = parse_network_cfg(cfgfile);
network* net = parse_network_cfg(cfgfile);
load_weights(&net, weightfile);
load_weights(net, weightfile);
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
int i = *net.seen/imgs;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
int imgs = net->batch*net->subdivisions;
int i = *net->seen/imgs;
data train, buffer;
layer l = net.layers[net.n - 1];
layer l = net->layers[net->n - 1];
int side = l.side;
int classes = l.classes;
@ -29,8 +29,8 @@ void train_swag(char *cfgfile, char *weightfile)
char **paths = (char **)list_to_array(plist);
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.w = net->w;
args.h = net->h;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
@ -43,7 +43,7 @@ void train_swag(char *cfgfile, char *weightfile)
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
//while(i*imgs < N*120){
while(get_current_batch(net) < net.max_batches){
while(get_current_batch(net) < net->max_batches){
i += 1;
pthread_join(load_thread, 0);

View File

@ -44,13 +44,13 @@ void train_voxel(char *cfgfile, char *weightfile)
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
network net = parse_network_cfg(cfgfile);
network* net = parse_network_cfg(cfgfile);
load_weights(&net, weightfile);
load_weights(net, weightfile);
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
int i = *net.seen/imgs;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
int imgs = net->batch*net->subdivisions;
int i = *(net->seen)/imgs;
data train, buffer;
@ -59,8 +59,8 @@ void train_voxel(char *cfgfile, char *weightfile)
char **paths = (char **)list_to_array(plist);
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.w = net->w;
args.h = net->h;
args.scale = 4;
args.paths = paths;
args.n = imgs;
@ -71,7 +71,7 @@ void train_voxel(char *cfgfile, char *weightfile)
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
//while(i*imgs < N*120){
while(get_current_batch(net) < net.max_batches){
while(get_current_batch(net) < net->max_batches){
i += 1;
pthread_join(load_thread, 0);
@ -105,11 +105,11 @@ void train_voxel(char *cfgfile, char *weightfile)
void test_voxel(char *cfgfile, char *weightfile, char *filename)
network net = parse_network_cfg(cfgfile);
network* net = parse_network_cfg(cfgfile);
load_weights(&net, weightfile);
load_weights(net, weightfile);
set_batch_network(&net, 1);
set_batch_network(net, 1);
clock_t time;
@ -126,7 +126,7 @@ void test_voxel(char *cfgfile, char *weightfile, char *filename)
strtok(input, "\n");
image im = load_image_color(input, 0, 0);
resize_network(&net, im.w, im.h);
resize_network(net, im.w, im.h);
printf("%d %d\n", im.w, im.h);
float *X =;

View File

@ -7,12 +7,12 @@ void train_writing(char *cfgfile, char *weightfile)
float avg_loss = -1;
char *base = basecfg(cfgfile);
printf("%s\n", base);
network net = parse_network_cfg(cfgfile);
network* net = parse_network_cfg(cfgfile);
load_weights(&net, weightfile);
load_weights(net, weightfile);
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
int imgs = net->batch*net->subdivisions;
list *plist = get_paths("figures.list");
char **paths = (char **)list_to_array(plist);
clock_t time;
@ -23,8 +23,8 @@ void train_writing(char *cfgfile, char *weightfile)
data train, buffer;
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.w = net->w;
args.h = net->h;
args.out_w = out.w;
args.out_h = out.h;
args.paths = paths;
@ -34,8 +34,8 @@ void train_writing(char *cfgfile, char *weightfile)
args.type = WRITING_DATA;
pthread_t load_thread = load_data_in_thread(args);
int epoch = (*net.seen)/N;
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
int epoch = *(net->seen)/N;
while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
pthread_join(load_thread, 0);
train = buffer;
@ -63,15 +63,15 @@ void train_writing(char *cfgfile, char *weightfile)
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*(net->seen))/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
if(get_current_batch(net)%100 == 0){
char buff[256];
sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net));
save_weights(net, buff);
if(*net.seen/N > epoch){
epoch = *net.seen/N;
if(*net->seen/N > epoch){
epoch = *net->seen/N;
char buff[256];
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
save_weights(net, buff);
@ -81,11 +81,11 @@ void train_writing(char *cfgfile, char *weightfile)
void test_writing(char *cfgfile, char *weightfile, char *filename)
network net = parse_network_cfg(cfgfile);
network* net = parse_network_cfg(cfgfile);
load_weights(&net, weightfile);
load_weights(net, weightfile);
set_batch_network(&net, 1);
set_batch_network(net, 1);
clock_t time;
char buff[256];
@ -102,7 +102,7 @@ void test_writing(char *cfgfile, char *weightfile, char *filename)
image im = load_image_color(input, 0, 0);
resize_network(&net, im.w, im.h);
resize_network(net, im.w, im.h);
printf("%d %d %d\n", im.h, im.w, im.c);
float *X =;
@ -114,8 +114,8 @@ void test_writing(char *cfgfile, char *weightfile, char *filename)
image thresh = threshold_image(upsampled, .5);
pred = thresh;
show_image(pred, "prediction");
show_image(im, "orig");
show_image(pred, "prediction", 0);
show_image(im, "orig", 0);
#ifdef OPENCV

@ -0,0 +1,15 @@
if ( GPU )
message("Compiling for GPU...")
message("CUDA_FILES = ${CUDA_FILES}")
add_library(DarkNet ${HEADER_FILES} ${SOURCE_FILES} ${CUDA_FILES})

View File

@ -14,12 +14,12 @@
void swap_binary(convolutional_layer *l)
float *swap = l->weights;
l->weights = l->binary_weights;
l->binary_weights = swap;
float *swap = l->weights;
l->weights = l->binary_weights;
l->binary_weights = swap;
#ifdef GPU
swap = l->weights_gpu;
swap = l->weights_gpu;
l->weights_gpu = l->binary_weights_gpu;
l->binary_weights_gpu = swap;
@ -27,65 +27,65 @@ void swap_binary(convolutional_layer *l)
void binarize_weights(float *weights, int n, int size, float *binary)
int i, f;
for(f = 0; f < n; ++f){
float mean = 0;
for(i = 0; i < size; ++i){
mean += fabs(weights[f*size + i]);
mean = mean / size;
for(i = 0; i < size; ++i){
binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean;
int i, f;
for(f = 0; f < n; ++f){
float mean = 0;
for(i = 0; i < size; ++i){
mean += fabs(weights[f*size + i]);
mean = mean / size;
for(i = 0; i < size; ++i){
binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean;
void binarize_cpu(float *input, int n, float *binary)
int i;
for(i = 0; i < n; ++i){
binary[i] = (input[i] > 0) ? 1 : -1;
int i;
for(i = 0; i < n; ++i){
binary[i] = (input[i] > 0) ? 1 : -1;
void binarize_input(float *input, int n, int size, float *binary)
int i, s;
for(s = 0; s < size; ++s){
float mean = 0;
for(i = 0; i < n; ++i){
mean += fabs(input[i*size + s]);
mean = mean / n;
for(i = 0; i < n; ++i){
binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean;
int i, s;
for(s = 0; s < size; ++s){
float mean = 0;
for(i = 0; i < n; ++i){
mean += fabs(input[i*size + s]);
mean = mean / n;
for(i = 0; i < n; ++i){
binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean;
int convolutional_out_height(convolutional_layer l)
return (l.h + 2*l.pad - l.size) / l.stride + 1;
return (l.h + 2*l.pad - l.size) / l.stride + 1;
int convolutional_out_width(convolutional_layer l)
return (l.w + 2*l.pad - l.size) / l.stride + 1;
return (l.w + 2*l.pad - l.size) / l.stride + 1;
image get_convolutional_image(convolutional_layer l)
return float_to_image(l.out_w,l.out_h,l.out_c,l.output);
return float_to_image(l.out_w,l.out_h,l.out_c,l.output);
image get_convolutional_delta(convolutional_layer l)
return float_to_image(l.out_w,l.out_h,l.out_c,;
return float_to_image(l.out_w,l.out_h,l.out_c,;
static size_t get_workspace_size(layer l){
#ifdef CUDNN
if(gpu_index >= 0){
if(gpu_index >= 0){
size_t most = 0;
size_t s = 0;
@ -115,7 +115,7 @@ static size_t get_workspace_size(layer l){
return most;
return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float);
return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float);
#ifdef GPU
@ -173,93 +173,96 @@ void cudnn_convolutional_setup(layer *l)
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
make_convolutional_layer(int batch, int h, int w, int c, int n, int groups,
int size, int stride, int padding, ACTIVATION activation,
int batch_normalize, int binary, int xnor, int adam)
int i;
convolutional_layer l = {0};
int i;
convolutional_layer l = {0};
l.groups = groups;
l.h = h;
l.w = w;
l.c = c;
l.n = n;
l.binary = binary;
l.xnor = xnor;
l.batch = batch;
l.stride = stride;
l.size = size;
l.pad = padding;
l.batch_normalize = batch_normalize;
l.groups = groups;
l.h = h;
l.w = w;
l.c = c;
l.n = n;
l.binary = binary;
l.xnor = xnor;
l.batch = batch;
l.stride = stride;
l.size = size;
l.pad = padding;
l.batch_normalize = batch_normalize;
l.weights = calloc(c/groups*n*size*size, sizeof(float));
l.weight_updates = calloc(c/groups*n*size*size, sizeof(float));
l.weights = calloc(c/groups*n*size*size, sizeof(float));
l.weight_updates = calloc(c/groups*n*size*size, sizeof(float));
l.biases = calloc(n, sizeof(float));
l.bias_updates = calloc(n, sizeof(float));
l.biases = calloc(n, sizeof(float));
l.bias_updates = calloc(n, sizeof(float));
l.nweights = c/groups*n*size*size;
l.nbiases = n;
l.nweights = c/groups*n*size*size;
l.nbiases = n;
// float scale = 1./sqrt(size*size*c);
float scale = sqrt(2./(size*size*c/l.groups));
//printf("convscale %f\n", scale);
//scale = .02;
//for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal();
int out_w = convolutional_out_width(l);
int out_h = convolutional_out_height(l);
l.out_h = out_h;
l.out_w = out_w;
l.out_c = n;
l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = l.w * l.h * l.c;
// float scale = 1./sqrt(size*size*c);
float scale = sqrt(2./(size*size*c/l.groups));
//printf("convscale %f\n", scale);
//scale = .02;
//for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal();
int out_w = convolutional_out_width(l);
int out_h = convolutional_out_height(l);
l.out_h = out_h;
l.out_w = out_w;
l.out_c = n;
l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = l.w * l.h * l.c;
l.output = calloc(l.batch*l.outputs, sizeof(float)); = calloc(l.batch*l.outputs, sizeof(float));
l.output = calloc(l.batch*l.outputs, sizeof(float)); = calloc(l.batch*l.outputs, sizeof(float));
l.forward = forward_convolutional_layer;
l.backward = backward_convolutional_layer;
l.update = update_convolutional_layer;
l.binary_weights = calloc(l.nweights, sizeof(float));
l.cweights = calloc(l.nweights, sizeof(char));
l.scales = calloc(n, sizeof(float));
l.binary_weights = calloc(l.nweights, sizeof(float));
l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
l.forward = forward_convolutional_layer;
l.backward = backward_convolutional_layer;
l.update = update_convolutional_layer;
l.binary_weights = calloc(l.nweights, sizeof(float));
l.cweights = calloc(l.nweights, sizeof(char));
l.scales = calloc(n, sizeof(float));
l.binary_weights = calloc(l.nweights, sizeof(float));
l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
l.scales = calloc(n, sizeof(float));
l.scale_updates = calloc(n, sizeof(float));
for(i = 0; i < n; ++i){
l.scales[i] = 1;
l.scales = calloc(n, sizeof(float));
l.scale_updates = calloc(n, sizeof(float));
for(i = 0; i < n; ++i){
l.scales[i] = 1;
l.mean = calloc(n, sizeof(float));
l.variance = calloc(n, sizeof(float));
l.mean = calloc(n, sizeof(float));
l.variance = calloc(n, sizeof(float));
l.mean_delta = calloc(n, sizeof(float));
l.variance_delta = calloc(n, sizeof(float));
l.mean_delta = calloc(n, sizeof(float));
l.variance_delta = calloc(n, sizeof(float));
l.rolling_mean = calloc(n, sizeof(float));
l.rolling_variance = calloc(n, sizeof(float));
l.x = calloc(l.batch*l.outputs, sizeof(float));
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
l.m = calloc(l.nweights, sizeof(float));
l.v = calloc(l.nweights, sizeof(float));
l.bias_m = calloc(n, sizeof(float));
l.scale_m = calloc(n, sizeof(float));
l.bias_v = calloc(n, sizeof(float));
l.scale_v = calloc(n, sizeof(float));
l.rolling_mean = calloc(n, sizeof(float));
l.rolling_variance = calloc(n, sizeof(float));
l.x = calloc(l.batch*l.outputs, sizeof(float));
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
l.m = calloc(l.nweights, sizeof(float));
l.v = calloc(l.nweights, sizeof(float));
l.bias_m = calloc(n, sizeof(float));
l.scale_m = calloc(n, sizeof(float));
l.bias_v = calloc(n, sizeof(float));
l.scale_v = calloc(n, sizeof(float));
#ifdef GPU
l.forward_gpu = forward_convolutional_layer_gpu;
l.forward_gpu = forward_convolutional_layer_gpu;
l.backward_gpu = backward_convolutional_layer_gpu;
l.update_gpu = update_convolutional_layer_gpu;
@ -319,27 +322,27 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
l.workspace_size = get_workspace_size(l);
l.activation = activation;
l.workspace_size = get_workspace_size(l);
l.activation = activation;
fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.);
fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.);
return l;
return l;
void denormalize_convolutional_layer(convolutional_layer l)
int i, j;
for(i = 0; i < l.n; ++i){
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){
l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale;
l.biases[i] -= l.rolling_mean[i] * scale;
l.scales[i] = 1;
l.rolling_mean[i] = 0;
l.rolling_variance[i] = 1;
int i, j;
for(i = 0; i < l.n; ++i){
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){
l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale;
l.biases[i] -= l.rolling_mean[i] * scale;
l.scales[i] = 1;
l.rolling_mean[i] = 0;
l.rolling_variance[i] = 1;
@ -369,26 +372,26 @@ void test_convolutional_layer()
void resize_convolutional_layer(convolutional_layer *l, int w, int h)
l->w = w;
l->h = h;
int out_w = convolutional_out_width(*l);
int out_h = convolutional_out_height(*l);
l->w = w;
l->h = h;
int out_w = convolutional_out_width(*l);
int out_h = convolutional_out_height(*l);
l->out_w = out_w;
l->out_h = out_h;
l->out_w = out_w;
l->out_h = out_h;
l->outputs = l->out_h * l->out_w * l->out_c;
l->inputs = l->w * l->h * l->c;
l->outputs = l->out_h * l->out_w * l->out_c;
l->inputs = l->w * l->h * l->c;
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
#ifdef GPU
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
@ -405,218 +408,218 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
l->workspace_size = get_workspace_size(*l);
l->workspace_size = get_workspace_size(*l);
void add_bias(float *output, float *biases, int batch, int n, int size)
int i,j,b;
for(b = 0; b < batch; ++b){
for(i = 0; i < n; ++i){
for(j = 0; j < size; ++j){
output[(b*n + i)*size + j] += biases[i];
int i,j,b;
for(b = 0; b < batch; ++b){
for(i = 0; i < n; ++i){
for(j = 0; j < size; ++j){
output[(b*n + i)*size + j] += biases[i];
void scale_bias(float *output, float *scales, int batch, int n, int size)
int i,j,b;
for(b = 0; b < batch; ++b){
for(i = 0; i < n; ++i){
for(j = 0; j < size; ++j){
output[(b*n + i)*size + j] *= scales[i];
int i,j,b;
for(b = 0; b < batch; ++b){
for(i = 0; i < n; ++i){
for(j = 0; j < size; ++j){
output[(b*n + i)*size + j] *= scales[i];
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
int i,b;
for(b = 0; b < batch; ++b){
for(i = 0; i < n; ++i){
bias_updates[i] += sum_array(delta+size*(i+b*n), size);
int i,b;
for(b = 0; b < batch; ++b){
for(i = 0; i < n; ++i){
bias_updates[i] += sum_array(delta+size*(i+b*n), size);
void forward_convolutional_layer(convolutional_layer l, network net)
int i, j;
int i, j;
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);
binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
net.input = l.binary_input;
binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);
binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
net.input = l.binary_input;
int m = l.n/l.groups;
int k = l.size*l.size*l.c/l.groups;
int n = l.out_w*l.out_h;
for(i = 0; i < l.batch; ++i){
for(j = 0; j < l.groups; ++j){
float *a = l.weights + j*l.nweights/l.groups;
float *b = net.workspace;
float *c = l.output + (i*l.groups + j)*n*m;
float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
int m = l.n/l.groups;
int k = l.size*l.size*l.c/l.groups;
int n = l.out_w*l.out_h;
for(i = 0; i < l.batch; ++i){
for(j = 0; j < l.groups; ++j){
float *a = l.weights + j*l.nweights/l.groups;
float *b = net.workspace;
float *c = l.output + (i*l.groups + j)*n*m;
float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
if (l.size == 1) {
b = im;
} else {
im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
if (l.size == 1) {
b = im;
} else {
im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
forward_batchnorm_layer(l, net);
} else {
add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
forward_batchnorm_layer(l, net);
} else {
add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
activate_array(l.output, l.outputs*l.batch, l.activation);
if(l.binary || l.xnor) swap_binary(&l);
activate_array(l.output, l.outputs*l.batch, l.activation);
if(l.binary || l.xnor) swap_binary(&l);
void backward_convolutional_layer(convolutional_layer l, network net)
int i, j;
int m = l.n/l.groups;
int n = l.size*l.size*l.c/l.groups;
int k = l.out_w*l.out_h;
int i, j;
int m = l.n/l.groups;
int n = l.size*l.size*l.c/l.groups;
int k = l.out_w*l.out_h;
gradient_array(l.output, l.outputs*l.batch, l.activation,;
gradient_array(l.output, l.outputs*l.batch, l.activation,;
backward_batchnorm_layer(l, net);
} else {
backward_bias(l.bias_updates,, l.batch, l.n, k);
backward_batchnorm_layer(l, net);
} else {
backward_bias(l.bias_updates,, l.batch, l.n, k);
for(i = 0; i < l.batch; ++i){
for(j = 0; j < l.groups; ++j){
float *a = + (i*l.groups + j)*m*k;
float *b = net.workspace;
float *c = l.weight_updates + j*l.nweights/l.groups;
for(i = 0; i < l.batch; ++i){
for(j = 0; j < l.groups; ++j){
float *a = + (i*l.groups + j)*m*k;
float *b = net.workspace;
float *c = l.weight_updates + j*l.nweights/l.groups;
float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
float *imd = + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
float *imd = + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
if(l.size == 1){
b = im;
} else {
im2col_cpu(im, l.c/l.groups, l.h, l.w,
l.size, l.stride, l.pad, b);
if(l.size == 1){
b = im;
} else {
im2col_cpu(im, l.c/l.groups, l.h, l.w,
l.size, l.stride, l.pad, b);
if ( {
a = l.weights + j*l.nweights/l.groups;
b = + (i*l.groups + j)*m*k;
c = net.workspace;
if (l.size == 1) {
c = imd;
if ( {
a = l.weights + j*l.nweights/l.groups;
b = + (i*l.groups + j)*m*k;
c = net.workspace;
if (l.size == 1) {
c = imd;
if (l.size != 1) {
col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd);
if (l.size != 1) {
col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd);
void update_convolutional_layer(convolutional_layer l, update_args a)
float learning_rate = a.learning_rate*l.learning_rate_scale;
float momentum = a.momentum;
float decay = a.decay;
int batch = a.batch;
float learning_rate = a.learning_rate*l.learning_rate_scale;
float momentum = a.momentum;
float decay = a.decay;
int batch = a.batch;
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
scal_cpu(l.n, momentum, l.bias_updates, 1);
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
scal_cpu(l.n, momentum, l.bias_updates, 1);
axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
scal_cpu(l.n, momentum, l.scale_updates, 1);
axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
scal_cpu(l.n, momentum, l.scale_updates, 1);
axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
scal_cpu(l.nweights, momentum, l.weight_updates, 1);
axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
scal_cpu(l.nweights, momentum, l.weight_updates, 1);
image get_convolutional_weight(convolutional_layer l, int i)
int h = l.size;
int w = l.size;
int c = l.c/l.groups;
return float_to_image(w,h,c,l.weights+i*h*w*c);
int h = l.size;
int w = l.size;
int c = l.c/l.groups;
return float_to_image(w,h,c,l.weights+i*h*w*c);
void rgbgr_weights(convolutional_layer l)
int i;
for(i = 0; i < l.n; ++i){
image im = get_convolutional_weight(l, i);
if (im.c == 3) {
int i;
for(i = 0; i < l.n; ++i){
image im = get_convolutional_weight(l, i);
if (im.c == 3) {
void rescale_weights(convolutional_layer l, float scale, float trans)
int i;
for(i = 0; i < l.n; ++i){
image im = get_convolutional_weight(l, i);
if (im.c == 3) {
scale_image(im, scale);
float sum = sum_array(, im.w*im.h*im.c);
l.biases[i] += sum*trans;
int i;
for(i = 0; i < l.n; ++i){
image im = get_convolutional_weight(l, i);
if (im.c == 3) {
scale_image(im, scale);
float sum = sum_array(, im.w*im.h*im.c);
l.biases[i] += sum*trans;
image *get_weights(convolutional_layer l)
image *weights = calloc(l.n, sizeof(image));
int i;
for(i = 0; i < l.n; ++i){
weights[i] = copy_image(get_convolutional_weight(l, i));
char buff[256];
sprintf(buff, "filter%d", i);
save_image(weights[i], buff);
return weights;
image *weights = calloc(l.n, sizeof(image));
int i;
for(i = 0; i < l.n; ++i){
weights[i] = copy_image(get_convolutional_weight(l, i));
char buff[256];
sprintf(buff, "filter%d", i);
save_image(weights[i], buff);
return weights;
image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights)
image *single_weights = get_weights(l);
show_images(single_weights, l.n, window);
image *single_weights = get_weights(l);
show_images(single_weights, l.n, window);
image delta = get_convolutional_image(l);
image dc = collapse_image_layers(delta, 1);
char buff[256];
sprintf(buff, "%s: Output", window);
//show_image(dc, buff);
//save_image(dc, buff);
return single_weights;
image delta = get_convolutional_image(l);
image dc = collapse_image_layers(delta, 1);
char buff[256];
sprintf(buff, "%s: Output", window);
//show_image(dc, buff);
//save_image(dc, buff);
return single_weights;

View File

@ -739,7 +739,7 @@ int is_network(section *s)
|| strcmp(s->type, "[network]")==0);
network *parse_network_cfg(char *filename)
network* parse_network_cfg(char *filename)
list *sections = read_cfg(filename);
node *n = sections->front;