:crowmageddon: probably broke everything :crowmageddon:

This commit is contained in:
Joseph Redmon 2017-04-09 19:56:42 -07:00
parent 179ed8ec76
commit 8d9ed0a1d6
66 changed files with 1079 additions and 1012 deletions

View File

@ -1,7 +1,7 @@
classes= 80
train = /home/pjreddie/data/coco/trainvalno5k.txt
valid = coco_testdev
#valid = data/coco_val_5k.list
#valid = coco_testdev
valid = data/coco_val_5k.list
names = data/coco.names
backup = /home/pjreddie/backup/
eval=coco

View File

@ -5,8 +5,8 @@ subdivisions=1
# Training
# batch=64
# subdivisions=8
height=416
width=416
height=608
width=608
channels=3
momentum=0.9
decay=0.0005

View File

@ -35,29 +35,29 @@ layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
return l;
}
void forward_activation_layer(layer l, network_state state)
void forward_activation_layer(layer l, network net)
{
copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
activate_array(l.output, l.outputs*l.batch, l.activation);
}
void backward_activation_layer(layer l, network_state state)
void backward_activation_layer(layer l, network net)
{
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1);
copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
}
#ifdef GPU
void forward_activation_layer_gpu(layer l, network_state state)
void forward_activation_layer_gpu(layer l, network net)
{
copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
copy_ongpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
void backward_activation_layer_gpu(layer l, network_state state)
void backward_activation_layer_gpu(layer l, network net)
{
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

View File

@ -7,12 +7,12 @@
layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
void forward_activation_layer(layer l, network_state state);
void backward_activation_layer(layer l, network_state state);
void forward_activation_layer(layer l, network net);
void backward_activation_layer(layer l, network net);
#ifdef GPU
void forward_activation_layer_gpu(layer l, network_state state);
void backward_activation_layer_gpu(layer l, network_state state);
void forward_activation_layer_gpu(layer l, network net);
void backward_activation_layer_gpu(layer l, network net);
#endif
#endif

View File

@ -37,7 +37,7 @@ void resize_avgpool_layer(avgpool_layer *l, int w, int h)
l->inputs = h*w*l->c;
}
void forward_avgpool_layer(const avgpool_layer l, network_state state)
void forward_avgpool_layer(const avgpool_layer l, network net)
{
int b,i,k;
@ -47,14 +47,14 @@ void forward_avgpool_layer(const avgpool_layer l, network_state state)
l.output[out_index] = 0;
for(i = 0; i < l.h*l.w; ++i){
int in_index = i + l.h*l.w*(k + b*l.c);
l.output[out_index] += state.input[in_index];
l.output[out_index] += net.input[in_index];
}
l.output[out_index] /= l.h*l.w;
}
}
}
void backward_avgpool_layer(const avgpool_layer l, network_state state)
void backward_avgpool_layer(const avgpool_layer l, network net)
{
int b,i,k;
@ -63,7 +63,7 @@ void backward_avgpool_layer(const avgpool_layer l, network_state state)
int out_index = k + b*l.c;
for(i = 0; i < l.h*l.w; ++i){
int in_index = i + l.h*l.w*(k + b*l.c);
state.delta[in_index] += l.delta[out_index] / (l.h*l.w);
net.delta[in_index] += l.delta[out_index] / (l.h*l.w);
}
}
}

View File

@ -11,12 +11,12 @@ typedef layer avgpool_layer;
image get_avgpool_image(avgpool_layer l);
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
void resize_avgpool_layer(avgpool_layer *l, int w, int h);
void forward_avgpool_layer(const avgpool_layer l, network_state state);
void backward_avgpool_layer(const avgpool_layer l, network_state state);
void forward_avgpool_layer(const avgpool_layer l, network net);
void backward_avgpool_layer(const avgpool_layer l, network net);
#ifdef GPU
void forward_avgpool_layer_gpu(avgpool_layer l, network_state state);
void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
void forward_avgpool_layer_gpu(avgpool_layer l, network net);
void backward_avgpool_layer_gpu(avgpool_layer l, network net);
#endif
#endif

View File

@ -43,19 +43,19 @@ __global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float
}
}
extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state)
extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net)
{
size_t n = layer.c*layer.batch;
forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, state.input, layer.output_gpu);
forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu);
check_error(cudaPeekAtLastError());
}
extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state)
extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net)
{
size_t n = layer.c*layer.batch;
backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu);
backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu);
check_error(cudaPeekAtLastError());
}

View File

@ -132,14 +132,15 @@ void resize_batchnorm_layer(layer *layer, int w, int h)
fprintf(stderr, "Not implemented\n");
}
void forward_batchnorm_layer(layer l, network_state state)
void forward_batchnorm_layer(layer l, network net)
{
if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
if(l.type == CONNECTED){
l.out_c = l.outputs;
l.out_h = l.out_w = 1;
}
if(state.train){
copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
if(net.train){
mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);
@ -148,7 +149,6 @@ void forward_batchnorm_layer(layer l, network_state state)
scal_cpu(l.out_c, .99, l.rolling_variance, 1);
axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);
copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);
copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
} else {
@ -158,8 +158,12 @@ void forward_batchnorm_layer(layer l, network_state state)
add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
}
void backward_batchnorm_layer(const layer l, network_state state)
void backward_batchnorm_layer(layer l, network net)
{
if(!net.train){
l.mean = l.rolling_mean;
l.variance = l.rolling_variance;
}
backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h);
backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates);
@ -168,7 +172,7 @@ void backward_batchnorm_layer(const layer l, network_state state)
mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta);
variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta);
normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1);
if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
}
#ifdef GPU
@ -186,35 +190,35 @@ void push_batchnorm_layer(layer l)
cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c);
}
void forward_batchnorm_layer_gpu(layer l, network_state state)
void forward_batchnorm_layer_gpu(layer l, network net)
{
if(l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
if(l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
if(l.type == CONNECTED){
l.out_c = l.outputs;
l.out_h = l.out_w = 1;
}
if (state.train) {
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
if (net.train) {
#ifdef CUDNN
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
float one = 1;
float zero = 0;
cudnnBatchNormalizationForwardTraining(cudnn_handle(),
CUDNN_BATCHNORM_SPATIAL,
&one,
&zero,
l.dstTensorDesc,
l.x_gpu,
l.dstTensorDesc,
l.output_gpu,
l.normTensorDesc,
l.scales_gpu,
l.biases_gpu,
.01,
l.rolling_mean_gpu,
l.rolling_variance_gpu,
.00001,
l.mean_gpu,
l.variance_gpu);
CUDNN_BATCHNORM_SPATIAL,
&one,
&zero,
l.dstTensorDesc,
l.x_gpu,
l.dstTensorDesc,
l.output_gpu,
l.normTensorDesc,
l.scales_gpu,
l.biases_gpu,
.01,
l.rolling_mean_gpu,
l.rolling_variance_gpu,
.00001,
l.mean_gpu,
l.variance_gpu);
#else
fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);
fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu);
@ -239,8 +243,12 @@ void forward_batchnorm_layer_gpu(layer l, network_state state)
}
void backward_batchnorm_layer_gpu(const layer l, network_state state)
void backward_batchnorm_layer_gpu(layer l, network net)
{
if(!net.train){
l.mean_gpu = l.rolling_mean_gpu;
l.variance_gpu = l.rolling_variance_gpu;
}
#ifdef CUDNN
float one = 1;
float zero = 0;
@ -274,6 +282,6 @@ void backward_batchnorm_layer_gpu(const layer l, network_state state)
fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu);
normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
#endif
if(l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
if(l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

View File

@ -6,12 +6,12 @@
#include "network.h"
layer make_batchnorm_layer(int batch, int w, int h, int c);
void forward_batchnorm_layer(layer l, network_state state);
void backward_batchnorm_layer(layer l, network_state state);
void forward_batchnorm_layer(layer l, network net);
void backward_batchnorm_layer(layer l, network net);
#ifdef GPU
void forward_batchnorm_layer_gpu(layer l, network_state state);
void backward_batchnorm_layer_gpu(layer l, network_state state);
void forward_batchnorm_layer_gpu(layer l, network net);
void backward_batchnorm_layer_gpu(layer l, network net);
void pull_batchnorm_layer(layer l);
void push_batchnorm_layer(layer l);
#endif

View File

@ -145,7 +145,7 @@ __global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float
int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (index >= N) return;
x[index] = x[index] - (rate * sqrt(1.-pow(B2, t)) / (1.-pow(B1, t)) * m[index] / (sqrt(v[index]) + eps));
x[index] = x[index] + (rate * sqrt(1.-pow(B2, t)) / (1.-pow(B1, t)) * m[index] / (sqrt(v[index]) + eps));
//if(index == 0) printf("%f %f %f %f\n", m[index], v[index], (rate * sqrt(1.-pow(B2, t)) / (1.-pow(B1, t)) * m[index] / (sqrt(v[index]) + eps)));
}

View File

@ -123,7 +123,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
save_weights(net, buff);
}
if(get_current_batch(net)%100 == 0){
if(get_current_batch(net)%1000 == 0){
char buff[256];
sprintf(buff, "%s/%s.backup",backup_directory,base);
save_weights(net, buff);

View File

@ -125,19 +125,19 @@ void update_connected_layer(connected_layer l, int batch, float learning_rate, f
scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1);
}
void forward_connected_layer(connected_layer l, network_state state)
void forward_connected_layer(connected_layer l, network net)
{
int i;
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
int m = l.batch;
int k = l.inputs;
int n = l.outputs;
float *a = state.input;
float *a = net.input;
float *b = l.weights;
float *c = l.output;
gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
if(l.batch_normalize){
if(state.train){
if(net.train){
mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);
@ -160,7 +160,7 @@ void forward_connected_layer(connected_layer l, network_state state)
activate_array(l.output, l.outputs*l.batch, l.activation);
}
void backward_connected_layer(connected_layer l, network_state state)
void backward_connected_layer(connected_layer l, network net)
{
int i;
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
@ -181,7 +181,7 @@ void backward_connected_layer(connected_layer l, network_state state)
int k = l.batch;
int n = l.inputs;
float *a = l.delta;
float *b = state.input;
float *b = net.input;
float *c = l.weight_updates;
gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);
@ -191,7 +191,7 @@ void backward_connected_layer(connected_layer l, network_state state)
a = l.delta;
b = l.weights;
c = state.delta;
c = net.delta;
if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
@ -274,7 +274,7 @@ void update_connected_layer_gpu(connected_layer l, int batch, float learning_rat
scal_ongpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1);
}
void forward_connected_layer_gpu(connected_layer l, network_state state)
void forward_connected_layer_gpu(connected_layer l, network net)
{
int i;
fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);
@ -282,12 +282,12 @@ void forward_connected_layer_gpu(connected_layer l, network_state state)
int m = l.batch;
int k = l.inputs;
int n = l.outputs;
float * a = state.input;
float * a = net.input_gpu;
float * b = l.weights_gpu;
float * c = l.output_gpu;
gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
if(l.batch_normalize){
forward_batchnorm_layer_gpu(l, state);
forward_batchnorm_layer_gpu(l, net);
}
for(i = 0; i < l.batch; ++i){
axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1);
@ -295,7 +295,7 @@ void forward_connected_layer_gpu(connected_layer l, network_state state)
activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
void backward_connected_layer_gpu(connected_layer l, network_state state)
void backward_connected_layer_gpu(connected_layer l, network net)
{
int i;
constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
@ -305,14 +305,14 @@ void backward_connected_layer_gpu(connected_layer l, network_state state)
}
if(l.batch_normalize){
backward_batchnorm_layer_gpu(l, state);
backward_batchnorm_layer_gpu(l, net);
}
int m = l.outputs;
int k = l.batch;
int n = l.inputs;
float * a = l.delta_gpu;
float * b = state.input;
float * b = net.input_gpu;
float * c = l.weight_updates_gpu;
gemm_ongpu(1,0,m,n,k,1,a,m,b,n,1,c,n);
@ -322,7 +322,7 @@ void backward_connected_layer_gpu(connected_layer l, network_state state)
a = l.delta_gpu;
b = l.weights_gpu;
c = state.delta;
c = net.delta_gpu;
if(c) gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
}

View File

@ -9,15 +9,15 @@ typedef layer connected_layer;
connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize);
void forward_connected_layer(connected_layer layer, network_state state);
void backward_connected_layer(connected_layer layer, network_state state);
void forward_connected_layer(connected_layer layer, network net);
void backward_connected_layer(connected_layer layer, network net);
void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay);
void denormalize_connected_layer(layer l);
void statistics_connected_layer(layer l);
#ifdef GPU
void forward_connected_layer_gpu(connected_layer layer, network_state state);
void backward_connected_layer_gpu(connected_layer layer, network_state state);
void forward_connected_layer_gpu(connected_layer layer, network net);
void backward_connected_layer_gpu(connected_layer layer, network net);
void update_connected_layer_gpu(connected_layer layer, int batch, float learning_rate, float momentum, float decay);
void push_connected_layer(connected_layer layer);
void pull_connected_layer(connected_layer layer);

View File

@ -70,7 +70,7 @@ void binarize_weights_gpu(float *weights, int n, int size, float *binary)
check_error(cudaPeekAtLastError());
}
void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
void forward_convolutional_layer_gpu(convolutional_layer l, network net)
{
fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);
if(l.binary){
@ -81,8 +81,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
if(l.xnor){
binarize_weights_gpu(l.weights_gpu, l.n, l.c*l.size*l.size, l.binary_weights_gpu);
swap_binary(&l);
binarize_gpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input_gpu);
state.input = l.binary_input_gpu;
binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu);
net.input_gpu = l.binary_input_gpu;
}
#ifdef CUDNN
@ -90,12 +90,12 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
cudnnConvolutionForward(cudnn_handle(),
&one,
l.srcTensorDesc,
state.input,
net.input_gpu,
l.weightDesc,
l.weights_gpu,
l.convDesc,
l.fw_algo,
state.workspace,
net.workspace,
l.workspace_size,
&one,
l.dstTensorDesc,
@ -107,16 +107,16 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
int k = l.size*l.size*l.c;
int n = l.out_w*l.out_h;
for(i = 0; i < l.batch; ++i){
im2col_ongpu(state.input + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, state.workspace);
im2col_ongpu(net.input_gpu + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace);
float * a = l.weights_gpu;
float * b = state.workspace;
float * b = net.workspace;
float * c = l.output_gpu;
gemm_ongpu(0,0,m,n,k,1.,a,k,b,n,1.,c+i*m*n,n);
}
#endif
if (l.batch_normalize) {
forward_batchnorm_layer_gpu(l, state);
forward_batchnorm_layer_gpu(l, net);
} else {
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
}
@ -168,40 +168,40 @@ extern "C" void smooth_layer(layer l, int size, float rate)
check_error(cudaPeekAtLastError());
}
void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
void backward_convolutional_layer_gpu(convolutional_layer l, network net)
{
if(l.smooth){
smooth_layer(l, 5, l.smooth);
}
//constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
if(l.batch_normalize){
backward_batchnorm_layer_gpu(l, state);
backward_batchnorm_layer_gpu(l, net);
} else {
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
}
float *original_input = state.input;
float *original_input = net.input_gpu;
if(l.xnor) state.input = l.binary_input_gpu;
if(l.xnor) net.input_gpu = l.binary_input_gpu;
#ifdef CUDNN
float one = 1;
cudnnConvolutionBackwardFilter(cudnn_handle(),
&one,
l.srcTensorDesc,
state.input,
net.input_gpu,
l.ddstTensorDesc,
l.delta_gpu,
l.convDesc,
l.bf_algo,
state.workspace,
net.workspace,
l.workspace_size,
&one,
l.dweightDesc,
l.weight_updates_gpu);
if(state.delta){
if(net.delta_gpu){
if(l.binary || l.xnor) swap_binary(&l);
cudnnConvolutionBackwardData(cudnn_handle(),
&one,
@ -211,13 +211,13 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
l.delta_gpu,
l.convDesc,
l.bd_algo,
state.workspace,
net.workspace,
l.workspace_size,
&one,
l.dsrcTensorDesc,
state.delta);
net.delta_gpu);
if(l.binary || l.xnor) swap_binary(&l);
if(l.xnor) gradient_array_ongpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, state.delta);
if(l.xnor) gradient_array_ongpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, net.delta_gpu);
}
#else
@ -228,25 +228,25 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
int i;
for(i = 0; i < l.batch; ++i){
float * a = l.delta_gpu;
float * b = state.workspace;
float * b = net.workspace;
float * c = l.weight_updates_gpu;
im2col_ongpu(state.input + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, state.workspace);
im2col_ongpu(net.input_gpu + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace);
gemm_ongpu(0,1,m,n,k,1,a + i*m*k,k,b,k,1,c,n);
if(state.delta){
if(net.delta_gpu){
if(l.binary || l.xnor) swap_binary(&l);
float * a = l.weights_gpu;
float * b = l.delta_gpu;
float * c = state.workspace;
float * c = net.workspace;
gemm_ongpu(1,0,n,k,m,1,a,n,b + i*k*m,k,0,c,k);
col2im_ongpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta + i*l.c*l.h*l.w);
col2im_ongpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu + i*l.c*l.h*l.w);
if(l.binary || l.xnor) {
swap_binary(&l);
}
if(l.xnor) gradient_array_ongpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, state.delta + i*l.c*l.h*l.w);
if(l.xnor) gradient_array_ongpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w);
}
}
#endif
@ -286,33 +286,42 @@ void push_convolutional_layer(convolutional_layer layer)
}
}
void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay)
void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch)
{
int size = layer.size*layer.size*layer.c*layer.n;
axpy_ongpu(layer.n, learning_rate/batch, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1);
scal_ongpu(n, B1, m, 1);
scal_ongpu(n, B2, v, 1);
axpy_ongpu(n, -decay*batch, w, 1, d, 1);
if(layer.scales_gpu){
axpy_ongpu(layer.n, learning_rate/batch, layer.scale_updates_gpu, 1, layer.scales_gpu, 1);
scal_ongpu(layer.n, momentum, layer.scale_updates_gpu, 1);
}
axpy_ongpu(n, (1-B1), d, 1, m, 1);
mul_ongpu(n, d, 1, d, 1);
axpy_ongpu(n, (1-B2), d, 1, v, 1);
if(layer.adam){
scal_ongpu(size, layer.B1, layer.m_gpu, 1);
scal_ongpu(size, layer.B2, layer.v_gpu, 1);
adam_gpu(n, w, m, v, B1, B2, rate/batch, eps, 1000);
fill_ongpu(n, 0, d, 1);
}
axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
void update_convolutional_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay)
{
int size = l.size*l.size*l.c*l.n;
axpy_ongpu(size, -(1-layer.B1), layer.weight_updates_gpu, 1, layer.m_gpu, 1);
mul_ongpu(size, layer.weight_updates_gpu, 1, layer.weight_updates_gpu, 1);
axpy_ongpu(size, (1-layer.B2), layer.weight_updates_gpu, 1, layer.v_gpu, 1);
adam_gpu(size, layer.weights_gpu, layer.m_gpu, layer.v_gpu, layer.B1, layer.B2, learning_rate/batch, layer.eps, layer.t+1);
fill_ongpu(size, 0, layer.weight_updates_gpu, 1);
if(l.adam){
adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, size, batch);
adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch);
if(l.scales_gpu){
adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch);
}
}else{
axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
scal_ongpu(size, momentum, layer.weight_updates_gpu, 1);
axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
axpy_ongpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
scal_ongpu(size, momentum, l.weight_updates_gpu, 1);
axpy_ongpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
scal_ongpu(l.n, momentum, l.bias_updates_gpu, 1);
if(l.scales_gpu){
axpy_ongpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
scal_ongpu(l.n, momentum, l.scale_updates_gpu, 1);
}
}
}

View File

@ -12,11 +12,6 @@
#include "xnor_layer.h"
#endif
#ifndef AI2
#define AI2 0
void forward_xnor_layer(layer l, network_state state);
#endif
void swap_binary(convolutional_layer *l)
{
float *swap = l->weights;
@ -188,9 +183,14 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
l.biases = calloc(n, sizeof(float));
l.bias_updates = calloc(n, sizeof(float));
l.nweights = c*n*size*size;
l.nbiases = n;
// float scale = 1./sqrt(size*size*c);
float scale = sqrt(2./(size*size*c));
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
scale = .02;
//for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
int out_w = convolutional_out_width(l);
int out_h = convolutional_out_height(l);
l.out_h = out_h;
@ -237,6 +237,10 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
l.adam = 1;
l.m = calloc(c*n*size*size, sizeof(float));
l.v = calloc(c*n*size*size, sizeof(float));
l.bias_m = calloc(n, sizeof(float));
l.scale_m = calloc(n, sizeof(float));
l.bias_v = calloc(n, sizeof(float));
l.scale_v = calloc(n, sizeof(float));
}
#ifdef GPU
@ -248,6 +252,10 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
if (adam) {
l.m_gpu = cuda_make_array(l.m, c*n*size*size);
l.v_gpu = cuda_make_array(l.v, c*n*size*size);
l.bias_m_gpu = cuda_make_array(l.bias_m, n);
l.bias_v_gpu = cuda_make_array(l.bias_v, n);
l.scale_m_gpu = cuda_make_array(l.scale_m, n);
l.scale_v_gpu = cuda_make_array(l.scale_v, n);
}
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
@ -319,6 +327,7 @@ void denormalize_convolutional_layer(convolutional_layer l)
}
}
/*
void test_convolutional_layer()
{
convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0);
@ -338,10 +347,10 @@ void test_convolutional_layer()
3,3,3,3,3,
3,3,3,3,3,
3,3,3,3,3};
network_state state = {0};
state.input = data;
forward_convolutional_layer(l, state);
//net.input = data;
//forward_convolutional_layer(l);
}
*/
void resize_convolutional_layer(convolutional_layer *l, int w, int h)
{
@ -418,7 +427,7 @@ void backward_bias(float *bias_updates, float *delta, int batch, int n, int size
}
}
void forward_convolutional_layer(convolutional_layer l, network_state state)
void forward_convolutional_layer(convolutional_layer l, network net)
{
int out_h = l.out_h;
int out_w = l.out_w;
@ -429,8 +438,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
if(l.xnor){
binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights);
swap_binary(&l);
binarize_cpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input);
state.input = l.binary_input;
binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
net.input = l.binary_input;
}
int m = l.n;
@ -439,19 +448,19 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
float *a = l.weights;
float *b = state.workspace;
float *b = net.workspace;
float *c = l.output;
for(i = 0; i < l.batch; ++i){
im2col_cpu(state.input, l.c, l.h, l.w,
im2col_cpu(net.input, l.c, l.h, l.w,
l.size, l.stride, l.pad, b);
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
c += n*m;
state.input += l.c*l.h*l.w;
net.input += l.c*l.h*l.w;
}
if(l.batch_normalize){
forward_batchnorm_layer(l, state);
forward_batchnorm_layer(l, net);
} else {
add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
}
@ -460,7 +469,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
if(l.binary || l.xnor) swap_binary(&l);
}
void backward_convolutional_layer(convolutional_layer l, network_state state)
void backward_convolutional_layer(convolutional_layer l, network net)
{
int i;
int m = l.n;
@ -470,30 +479,30 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
gradient_array(l.output, m*k*l.batch, l.activation, l.delta);
if(l.batch_normalize){
backward_batchnorm_layer(l, state);
backward_batchnorm_layer(l, net);
} else {
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
}
for(i = 0; i < l.batch; ++i){
float *a = l.delta + i*m*k;
float *b = state.workspace;
float *b = net.workspace;
float *c = l.weight_updates;
float *im = state.input+i*l.c*l.h*l.w;
float *im = net.input+i*l.c*l.h*l.w;
im2col_cpu(im, l.c, l.h, l.w,
l.size, l.stride, l.pad, b);
gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
if(state.delta){
if(net.delta){
a = l.weights;
b = l.delta + i*m*k;
c = state.workspace;
c = net.workspace;
gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);
col2im_cpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w);
}
}
}
@ -553,8 +562,14 @@ image *get_weights(convolutional_layer l)
int i;
for(i = 0; i < l.n; ++i){
weights[i] = copy_image(get_convolutional_weight(l, i));
//normalize_image(weights[i]);
normalize_image(weights[i]);
/*
char buff[256];
sprintf(buff, "filter%d", i);
save_image(weights[i], buff);
*/
}
//error("hey");
return weights;
}

View File

@ -10,8 +10,8 @@
typedef layer convolutional_layer;
#ifdef GPU
void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
void forward_convolutional_layer_gpu(convolutional_layer layer, network net);
void backward_convolutional_layer_gpu(convolutional_layer layer, network net);
void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
void push_convolutional_layer(convolutional_layer layer);
@ -19,6 +19,7 @@ void pull_convolutional_layer(convolutional_layer layer);
void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch);
#ifdef CUDNN
void cudnn_convolutional_setup(layer *l);
#endif
@ -27,18 +28,19 @@ void cudnn_convolutional_setup(layer *l);
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam);
void denormalize_convolutional_layer(convolutional_layer l);
void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
void forward_convolutional_layer(const convolutional_layer layer, network_state state);
void forward_convolutional_layer(const convolutional_layer layer, network net);
void update_convolutional_layer(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights);
void binarize_weights(float *weights, int n, int size, float *binary);
void swap_binary(convolutional_layer *l);
void binarize_weights2(float *weights, int n, int size, char *binary, float *scales);
void backward_convolutional_layer(convolutional_layer layer, network_state state);
void backward_convolutional_layer(convolutional_layer layer, network net);
void add_bias(float *output, float *biases, int batch, int n, int size);
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
image *get_weights(convolutional_layer l);
image get_convolutional_image(convolutional_layer layer);
image get_convolutional_delta(convolutional_layer layer);
image get_convolutional_weight(convolutional_layer layer, int i);

View File

@ -73,28 +73,28 @@ void resize_cost_layer(cost_layer *l, int inputs)
#endif
}
void forward_cost_layer(cost_layer l, network_state state)
void forward_cost_layer(cost_layer l, network net)
{
if (!state.truth) return;
if (!net.truth) return;
if(l.cost_type == MASKED){
int i;
for(i = 0; i < l.batch*l.inputs; ++i){
if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM;
if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM;
}
}
if(l.cost_type == SMOOTH){
smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
}else if(l.cost_type == L1){
l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
} else {
l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
}
l.cost[0] = sum_array(l.output, l.batch*l.inputs);
}
void backward_cost_layer(const cost_layer l, network_state state)
void backward_cost_layer(const cost_layer l, network net)
{
axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1);
axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1);
}
#ifdef GPU
@ -118,23 +118,23 @@ int float_abs_compare (const void * a, const void * b)
return (fa > fb) - (fa < fb);
}
void forward_cost_layer_gpu(cost_layer l, network_state state)
void forward_cost_layer_gpu(cost_layer l, network net)
{
if (!state.truth) return;
if (!net.truth) return;
if(l.smooth){
scal_ongpu(l.batch*l.inputs, (1-l.smooth), state.truth, 1);
add_ongpu(l.batch*l.inputs, l.smooth * 1./l.inputs, state.truth, 1);
scal_ongpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1);
add_ongpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1);
}
if (l.cost_type == MASKED) {
mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth);
mask_ongpu(l.batch*l.inputs, net.input_gpu, SECRET_NUM, net.truth_gpu);
}
if(l.cost_type == SMOOTH){
smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu);
} else if (l.cost_type == L1){
l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu);
} else {
l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu);
}
if(l.ratio){
@ -155,9 +155,9 @@ void forward_cost_layer_gpu(cost_layer l, network_state state)
l.cost[0] = sum_array(l.output, l.batch*l.inputs);
}
void backward_cost_layer_gpu(const cost_layer l, network_state state)
void backward_cost_layer_gpu(const cost_layer l, network net)
{
axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1);
axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

View File

@ -8,13 +8,13 @@ typedef layer cost_layer;
COST_TYPE get_cost_type(char *s);
char *get_cost_string(COST_TYPE a);
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
void forward_cost_layer(const cost_layer l, network_state state);
void backward_cost_layer(const cost_layer l, network_state state);
void forward_cost_layer(const cost_layer l, network net);
void backward_cost_layer(const cost_layer l, network net);
void resize_cost_layer(cost_layer *l, int inputs);
#ifdef GPU
void forward_cost_layer_gpu(cost_layer l, network_state state);
void backward_cost_layer_gpu(const cost_layer l, network_state state);
void forward_cost_layer_gpu(cost_layer l, network net);
void backward_cost_layer_gpu(const cost_layer l, network net);
#endif
#endif

View File

@ -88,10 +88,10 @@ void update_crnn_layer(layer l, int batch, float learning_rate, float momentum,
update_convolutional_layer(*(l.output_layer), batch, learning_rate, momentum, decay);
}
void forward_crnn_layer(layer l, network_state state)
void forward_crnn_layer(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_layer = *(l.input_layer);
layer self_layer = *(l.self_layer);
@ -100,17 +100,17 @@ void forward_crnn_layer(layer l, network_state state)
fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
if(state.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
for (i = 0; i < l.steps; ++i) {
s.input = state.input;
s.input = net.input;
forward_convolutional_layer(input_layer, s);
s.input = l.state;
forward_convolutional_layer(self_layer, s);
float *old_state = l.state;
if(state.train) l.state += l.hidden*l.batch;
if(net.train) l.state += l.hidden*l.batch;
if(l.shortcut){
copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
}else{
@ -122,17 +122,16 @@ void forward_crnn_layer(layer l, network_state state)
s.input = l.state;
forward_convolutional_layer(output_layer, s);
state.input += l.inputs*l.batch;
net.input += l.inputs*l.batch;
increment_layer(&input_layer, 1);
increment_layer(&self_layer, 1);
increment_layer(&output_layer, 1);
}
}
void backward_crnn_layer(layer l, network_state state)
void backward_crnn_layer(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
int i;
layer input_layer = *(l.input_layer);
layer self_layer = *(l.self_layer);
@ -168,8 +167,8 @@ void backward_crnn_layer(layer l, network_state state)
copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1);
if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1);
s.input = state.input + i*l.inputs*l.batch;
if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
s.input = net.input + i*l.inputs*l.batch;
if(net.delta) s.delta = net.delta + i*l.inputs*l.batch;
else s.delta = 0;
backward_convolutional_layer(input_layer, s);
@ -202,10 +201,9 @@ void update_crnn_layer_gpu(layer l, int batch, float learning_rate, float moment
update_convolutional_layer_gpu(*(l.output_layer), batch, learning_rate, momentum, decay);
}
void forward_crnn_layer_gpu(layer l, network_state state)
void forward_crnn_layer_gpu(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
int i;
layer input_layer = *(l.input_layer);
layer self_layer = *(l.self_layer);
@ -214,17 +212,17 @@ void forward_crnn_layer_gpu(layer l, network_state state)
fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1);
fill_ongpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1);
fill_ongpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1);
if(state.train) fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1);
if(net.train) fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1);
for (i = 0; i < l.steps; ++i) {
s.input = state.input;
s.input_gpu = net.input_gpu;
forward_convolutional_layer_gpu(input_layer, s);
s.input = l.state_gpu;
s.input_gpu = l.state_gpu;
forward_convolutional_layer_gpu(self_layer, s);
float *old_state = l.state_gpu;
if(state.train) l.state_gpu += l.hidden*l.batch;
if(net.train) l.state_gpu += l.hidden*l.batch;
if(l.shortcut){
copy_ongpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1);
}else{
@ -233,20 +231,20 @@ void forward_crnn_layer_gpu(layer l, network_state state)
axpy_ongpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1);
axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
s.input = l.state_gpu;
s.input_gpu = l.state_gpu;
forward_convolutional_layer_gpu(output_layer, s);
state.input += l.inputs*l.batch;
net.input_gpu += l.inputs*l.batch;
increment_layer(&input_layer, 1);
increment_layer(&self_layer, 1);
increment_layer(&output_layer, 1);
}
}
void backward_crnn_layer_gpu(layer l, network_state state)
void backward_crnn_layer_gpu(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_layer = *(l.input_layer);
layer self_layer = *(l.self_layer);
@ -259,22 +257,22 @@ void backward_crnn_layer_gpu(layer l, network_state state)
copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);
axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
s.input = l.state_gpu;
s.delta = self_layer.delta_gpu;
s.input_gpu = l.state_gpu;
s.delta_gpu = self_layer.delta_gpu;
backward_convolutional_layer_gpu(output_layer, s);
l.state_gpu -= l.hidden*l.batch;
s.input = l.state_gpu;
s.delta = self_layer.delta_gpu - l.hidden*l.batch;
if (i == 0) s.delta = 0;
s.input_gpu = l.state_gpu;
s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch;
if (i == 0) s.delta_gpu = 0;
backward_convolutional_layer_gpu(self_layer, s);
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
s.input = state.input + i*l.inputs*l.batch;
if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
else s.delta = 0;
s.input_gpu = net.input_gpu + i*l.inputs*l.batch;
if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch;
else s.delta_gpu = 0;
backward_convolutional_layer_gpu(input_layer, s);
increment_layer(&input_layer, -1);

View File

@ -8,13 +8,13 @@
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize);
void forward_crnn_layer(layer l, network_state state);
void backward_crnn_layer(layer l, network_state state);
void forward_crnn_layer(layer l, network net);
void backward_crnn_layer(layer l, network net);
void update_crnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);
#ifdef GPU
void forward_crnn_layer_gpu(layer l, network_state state);
void backward_crnn_layer_gpu(layer l, network_state state);
void forward_crnn_layer_gpu(layer l, network net);
void backward_crnn_layer_gpu(layer l, network net);
void update_crnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_crnn_layer(layer l);
void pull_crnn_layer(layer l);

View File

@ -10,8 +10,8 @@ image get_crop_image(crop_layer l)
return float_to_image(w,h,c,l.output);
}
void backward_crop_layer(const crop_layer l, network_state state){}
void backward_crop_layer_gpu(const crop_layer l, network_state state){}
void backward_crop_layer(const crop_layer l, network net){}
void backward_crop_layer_gpu(const crop_layer l, network net){}
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
{
@ -64,7 +64,7 @@ void resize_crop_layer(layer *l, int w, int h)
}
void forward_crop_layer(const crop_layer l, network_state state)
void forward_crop_layer(const crop_layer l, network net)
{
int i,j,c,b,row,col;
int index;
@ -78,7 +78,7 @@ void forward_crop_layer(const crop_layer l, network_state state)
scale = 1;
trans = 0;
}
if(!state.train){
if(!net.train){
flip = 0;
dh = (l.h - l.out_h)/2;
dw = (l.w - l.out_w)/2;
@ -94,7 +94,7 @@ void forward_crop_layer(const crop_layer l, network_state state)
}
row = i + dh;
index = col+l.w*(row+l.h*(c + l.c*b));
l.output[count++] = state.input[index]*scale + trans;
l.output[count++] = net.input[index]*scale + trans;
}
}
}

View File

@ -9,11 +9,11 @@ typedef layer crop_layer;
image get_crop_image(crop_layer l);
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
void forward_crop_layer(const crop_layer l, network_state state);
void forward_crop_layer(const crop_layer l, network net);
void resize_crop_layer(layer *l, int w, int h);
#ifdef GPU
void forward_crop_layer_gpu(crop_layer l, network_state state);
void forward_crop_layer_gpu(crop_layer l, network net);
#endif
#endif

View File

@ -180,7 +180,7 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i
output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
}
extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
extern "C" void forward_crop_layer_gpu(crop_layer layer, network net)
{
cuda_random(layer.rand_gpu, layer.batch*8);
@ -195,12 +195,12 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
int size = layer.batch * layer.w * layer.h;
levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale, layer.shift);
levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift);
check_error(cudaPeekAtLastError());
size = layer.batch*layer.c*layer.out_w*layer.out_h;
forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, state.train, layer.flip, radians, layer.output_gpu);
forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu);
check_error(cudaPeekAtLastError());
/*

View File

@ -348,6 +348,32 @@ void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
save_weights(net, outfile);
}
void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix)
{
network net = load_network(cfgfile, weightfile, 0);
image *ims = get_weights(net.layers[0]);
int n = net.layers[0].n;
int z;
for(z = 0; z < num; ++z){
image im = make_image(h, w, 3);
fill_image(im, .5);
int i;
for(i = 0; i < 100; ++i){
image r = copy_image(ims[rand()%n]);
rotate_image_cw(r, rand()%4);
random_distort_image(r, 1, 1.5, 1.5);
int dx = rand()%(w-r.w);
int dy = rand()%(h-r.h);
ghost_image(r, im, dx, dy);
free_image(r);
}
char buff[256];
sprintf(buff, "%s/gen_%d", prefix, z);
save_image(im, buff);
free_image(im);
}
}
void visualize(char *cfgfile, char *weightfile)
{
network net = parse_network_cfg(cfgfile);
@ -458,6 +484,8 @@ int main(int argc, char **argv)
average(argc, argv);
} else if (0 == strcmp(argv[1], "visualize")){
visualize(argv[2], (argc > 3) ? argv[3] : 0);
} else if (0 == strcmp(argv[1], "mkimg")){
mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]);
} else if (0 == strcmp(argv[1], "imtest")){
test_resize(argv[2]);
} else {

View File

@ -14,12 +14,9 @@ extern "C" {
#include "cuda.h"
}
extern "C" void forward_deconvolutional_layer_gpu(layer l, network_state state)
extern "C" void forward_deconvolutional_layer_gpu(layer l, network net)
{
int i;
int out_h = l.out_h;
int out_w = l.out_w;
int size = out_h*out_w;
int m = l.size*l.size*l.n;
int n = l.h*l.w;
@ -29,59 +26,57 @@ extern "C" void forward_deconvolutional_layer_gpu(layer l, network_state state)
for(i = 0; i < l.batch; ++i){
float *a = l.weights_gpu;
float *b = state.input + i*l.c*l.h*l.w;
float *c = state.workspace;
float *b = net.input_gpu + i*l.c*l.h*l.w;
float *c = net.workspace;
gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
col2im_ongpu(c, l.n, out_h, out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.n*size);
col2im_ongpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs);
}
if (l.batch_normalize) {
forward_batchnorm_layer_gpu(l, state);
forward_batchnorm_layer_gpu(l, net);
} else {
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
}
activate_array_ongpu(l.output_gpu, l.batch*l.n*size, l.activation);
activate_array_ongpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation);
}
extern "C" void backward_deconvolutional_layer_gpu(layer l, network_state state)
extern "C" void backward_deconvolutional_layer_gpu(layer l, network net)
{
int out_h = l.out_h;
int out_w = l.out_w;
int size = out_h*out_w;
int i;
constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
if(l.batch_normalize){
backward_batchnorm_layer_gpu(l, state);
backward_batchnorm_layer_gpu(l, net);
} else {
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
}
//if(state.delta) memset(state.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));
//if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float));
for(i = 0; i < l.batch; ++i){
int m = l.c;
int n = l.size*l.size*l.n;
int k = l.h*l.w;
float *a = state.input + i*m*n;
float *b = state.workspace;
float *a = net.input_gpu + i*m*k;
float *b = net.workspace;
float *c = l.weight_updates_gpu;
im2col_ongpu(l.delta_gpu + i*l.n*size, l.n, out_h, out_w,
im2col_ongpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w,
l.size, l.stride, l.pad, b);
gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
if(state.delta){
if(net.delta_gpu){
int m = l.c;
int n = l.h*l.w;
int k = l.size*l.size*l.n;
float *a = l.weights_gpu;
float *b = state.workspace;
float *c = state.delta + i*n*m;
float *b = net.workspace;
float *c = net.delta_gpu + i*n*m;
gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
@ -117,16 +112,25 @@ extern "C" void push_deconvolutional_layer(layer l)
void update_deconvolutional_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay)
{
int size = l.size*l.size*l.c*l.n;
axpy_ongpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
scal_ongpu(l.n, momentum, l.bias_updates_gpu, 1);
if(l.scales_gpu){
axpy_ongpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
scal_ongpu(l.n, momentum, l.scale_updates_gpu, 1);
if(l.adam){
adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, size, batch);
adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch);
if(l.scales_gpu){
adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch);
}
}else{
axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
axpy_ongpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
scal_ongpu(size, momentum, l.weight_updates_gpu, 1);
axpy_ongpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
scal_ongpu(l.n, momentum, l.bias_updates_gpu, 1);
if(l.scales_gpu){
axpy_ongpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
scal_ongpu(l.n, momentum, l.scale_updates_gpu, 1);
}
}
axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
axpy_ongpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
scal_ongpu(size, momentum, l.weight_updates_gpu, 1);
}

View File

@ -11,20 +11,11 @@
static size_t get_workspace_size(layer l){
return (size_t)l.h*l.w*l.size*l.size*l.c*sizeof(float);
return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float);
}
int deconvolutional_out_height(layer l)
{
return (l.h) * l.stride + l.size/2 - l.pad;
}
int deconvolutional_out_width(layer l)
{
return (l.w) * l.stride + l.size/2 - l.pad;
}
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, int batch_normalize)
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam)
{
int i;
layer l = {0};
@ -38,26 +29,29 @@ layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size
l.stride = stride;
l.size = size;
l.nweights = c*n*size*size;
l.nbiases = n;
l.weights = calloc(c*n*size*size, sizeof(float));
l.weight_updates = calloc(c*n*size*size, sizeof(float));
l.biases = calloc(n, sizeof(float));
l.bias_updates = calloc(n, sizeof(float));
float scale = 1./sqrt(size*size*c);
float scale = .02;
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
for(i = 0; i < n; ++i){
l.biases[i] = scale;
l.biases[i] = 0;
}
l.pad = l.size/2;
l.pad = padding;
l.out_h = (l.h) * l.stride + l.size/2 - l.pad;
l.out_w = (l.w) * l.stride + l.size/2 - l.pad;
l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad;
l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad;
l.out_c = n;
l.outputs = l.out_w * l.out_h * l.out_c;
l.inputs = l.w * l.h * l.c;
l.output = calloc(l.batch*l.out_h * l.out_w * n, sizeof(float));
l.delta = calloc(l.batch*l.out_h * l.out_w * n, sizeof(float));
l.output = calloc(l.batch*l.outputs, sizeof(float));
l.delta = calloc(l.batch*l.outputs, sizeof(float));
l.forward = forward_deconvolutional_layer;
l.backward = backward_deconvolutional_layer;
@ -83,6 +77,15 @@ layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size
l.x = calloc(l.batch*l.outputs, sizeof(float));
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
}
if(adam){
l.adam = 1;
l.m = calloc(c*n*size*size, sizeof(float));
l.v = calloc(c*n*size*size, sizeof(float));
l.bias_m = calloc(n, sizeof(float));
l.scale_m = calloc(n, sizeof(float));
l.bias_v = calloc(n, sizeof(float));
l.scale_v = calloc(n, sizeof(float));
}
#ifdef GPU
l.forward_gpu = forward_deconvolutional_layer_gpu;
@ -91,6 +94,14 @@ layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size
if(gpu_index >= 0){
if (adam) {
l.m_gpu = cuda_make_array(l.m, c*n*size*size);
l.v_gpu = cuda_make_array(l.v, c*n*size*size);
l.bias_m_gpu = cuda_make_array(l.bias_m, n);
l.bias_v_gpu = cuda_make_array(l.bias_v, n);
l.scale_m_gpu = cuda_make_array(l.scale_m, n);
l.scale_v_gpu = cuda_make_array(l.scale_v, n);
}
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
@ -137,8 +148,8 @@ void resize_deconvolutional_layer(layer *l, int h, int w)
{
l->h = h;
l->w = w;
l->out_h = (l->h) * l->stride + l->size/2 - l->pad;
l->out_w = (l->w) * l->stride + l->size/2 - l->pad;
l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad;
l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad;
l->outputs = l->out_h * l->out_w * l->out_c;
l->inputs = l->w * l->h * l->c;
@ -172,12 +183,9 @@ void resize_deconvolutional_layer(layer *l, int h, int w)
l->workspace_size = get_workspace_size(*l);
}
void forward_deconvolutional_layer(const layer l, network_state state)
void forward_deconvolutional_layer(const layer l, network net)
{
int i;
int out_h = l.out_h;
int out_w = l.out_w;
int size = out_h*out_w;
int m = l.size*l.size*l.n;
int n = l.h*l.w;
@ -187,60 +195,58 @@ void forward_deconvolutional_layer(const layer l, network_state state)
for(i = 0; i < l.batch; ++i){
float *a = l.weights;
float *b = state.input + i*l.c*l.h*l.w;
float *c = state.workspace;
float *b = net.input + i*l.c*l.h*l.w;
float *c = net.workspace;
gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);
gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size);
col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs);
}
if(l.batch_normalize){
forward_batchnorm_layer(l, state);
if (l.batch_normalize) {
forward_batchnorm_layer(l, net);
} else {
add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h);
}
activate_array(l.output, l.batch*l.n*size, l.activation);
activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation);
}
void backward_deconvolutional_layer(layer l, network_state state)
void backward_deconvolutional_layer(layer l, network net)
{
float alpha = 1./l.batch;
int out_h = deconvolutional_out_height(l);
int out_w = deconvolutional_out_width(l);
int size = out_h*out_w;
int i;
gradient_array(l.output, size*l.n*l.batch, l.activation, l.delta);
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
if(l.batch_normalize){
backward_batchnorm_layer(l, state);
backward_batchnorm_layer(l, net);
} else {
backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
}
//if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));
for(i = 0; i < l.batch; ++i){
int m = l.c;
int n = l.size*l.size*l.n;
int k = l.h*l.w;
float *a = state.input + i*m*n;
float *b = state.workspace;
float *a = net.input + i*m*k;
float *b = net.workspace;
float *c = l.weight_updates;
im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w,
l.size, l.stride, 0, b);
gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w,
l.size, l.stride, l.pad, b);
gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
if(state.delta){
if(net.delta){
int m = l.c;
int n = l.h*l.w;
int k = l.size*l.size*l.n;
float *a = l.weights;
float *b = state.workspace;
float *c = state.delta + i*n*m;
float *b = net.workspace;
float *c = net.delta + i*n*m;
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
}
}

View File

@ -8,18 +8,18 @@
#include "network.h"
#ifdef GPU
void forward_deconvolutional_layer_gpu(layer l, network_state state);
void backward_deconvolutional_layer_gpu(layer l, network_state state);
void forward_deconvolutional_layer_gpu(layer l, network net);
void backward_deconvolutional_layer_gpu(layer l, network net);
void update_deconvolutional_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_deconvolutional_layer(layer l);
void pull_deconvolutional_layer(layer l);
#endif
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, int batch_normalize);
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam);
void resize_deconvolutional_layer(layer *l, int h, int w);
void forward_deconvolutional_layer(const layer l, network_state state);
void forward_deconvolutional_layer(const layer l, network net);
void update_deconvolutional_layer(layer l, int batch, float learning_rate, float momentum, float decay);
void backward_deconvolutional_layer(layer l, network_state state);
void backward_deconvolutional_layer(layer l, network net);
#endif

View File

@ -41,7 +41,7 @@ void *fetch_in_thread(void *ptr)
if(!in.data){
error("Stream closed.");
}
in_s = resize_image(in, net.w, net.h);
in_s = letterbox_image(in, net.w, net.h);
return 0;
}
@ -61,7 +61,7 @@ void *detect_in_thread(void *ptr)
if(l.type == DETECTION){
get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
} else if (l.type == REGION){
get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0, demo_hier_thresh);
get_region_boxes(l, in.w, in.h, demo_thresh, probs, boxes, 0, 0, demo_hier_thresh, 1);
} else {
error("Last layer must produce detections\n");
}

View File

@ -46,11 +46,11 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
return l;
}
void forward_detection_layer(const detection_layer l, network_state state)
void forward_detection_layer(const detection_layer l, network net)
{
int locations = l.side*l.side;
int i,j;
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
//if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1);
int b;
if (l.softmax){
@ -63,7 +63,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
}
}
}
if(state.train){
if(net.train){
float avg_iou = 0;
float avg_cat = 0;
float avg_allcat = 0;
@ -77,7 +77,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
int index = b*l.inputs;
for (i = 0; i < locations; ++i) {
int truth_index = (b*locations + i)*(1+l.coords+l.classes);
int is_obj = state.truth[truth_index];
int is_obj = net.truth[truth_index];
for (j = 0; j < l.n; ++j) {
int p_index = index + locations*l.classes + i*l.n + j;
l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]);
@ -95,13 +95,13 @@ void forward_detection_layer(const detection_layer l, network_state state)
int class_index = index + i*l.classes;
for(j = 0; j < l.classes; ++j) {
l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]);
*(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2);
if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]);
*(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2);
if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
avg_allcat += l.output[class_index+j];
}
box truth = float_to_box(state.truth + truth_index + 1 + l.classes, 1);
box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1);
truth.x /= l.side;
truth.y /= l.side;
@ -139,7 +139,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
best_index = 0;
}
}
if(l.random && *(state.net.seen) < 64000){
if(l.random && *(net.seen) < 64000){
best_index = rand()%l.n;
}
@ -166,13 +166,13 @@ void forward_detection_layer(const detection_layer l, network_state state)
l.delta[p_index] = l.object_scale * (iou - l.output[p_index]);
}
l.delta[box_index+0] = l.coord_scale*(state.truth[tbox_index + 0] - l.output[box_index + 0]);
l.delta[box_index+1] = l.coord_scale*(state.truth[tbox_index + 1] - l.output[box_index + 1]);
l.delta[box_index+2] = l.coord_scale*(state.truth[tbox_index + 2] - l.output[box_index + 2]);
l.delta[box_index+3] = l.coord_scale*(state.truth[tbox_index + 3] - l.output[box_index + 3]);
l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]);
l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]);
l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]);
l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]);
if(l.sqrt){
l.delta[box_index+2] = l.coord_scale*(sqrt(state.truth[tbox_index + 2]) - l.output[box_index + 2]);
l.delta[box_index+3] = l.coord_scale*(sqrt(state.truth[tbox_index + 3]) - l.output[box_index + 3]);
l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]);
l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]);
}
*(l.cost) += pow(1-iou, 2);
@ -216,9 +216,9 @@ void forward_detection_layer(const detection_layer l, network_state state)
}
}
void backward_detection_layer(const detection_layer l, network_state state)
void backward_detection_layer(const detection_layer l, network net)
{
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1);
}
void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
@ -252,36 +252,25 @@ void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box
#ifdef GPU
void forward_detection_layer_gpu(const detection_layer l, network_state state)
void forward_detection_layer_gpu(const detection_layer l, network net)
{
if(!state.train){
copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
if(!net.train){
copy_ongpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1);
return;
}
float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
float *truth_cpu = 0;
if(state.truth){
int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
truth_cpu = calloc(num_truth, sizeof(float));
cuda_pull_array(state.truth, truth_cpu, num_truth);
}
cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);
network_state cpu_state = state;
cpu_state.train = state.train;
cpu_state.truth = truth_cpu;
cpu_state.input = in_cpu;
forward_detection_layer(l, cpu_state);
forward_detection_layer(l, net);
cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
free(cpu_state.input);
if(cpu_state.truth) free(cpu_state.truth);
}
void backward_detection_layer_gpu(detection_layer l, network_state state)
void backward_detection_layer_gpu(detection_layer l, network net)
{
axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
//copy_ongpu(l.batch*l.inputs, l.delta_gpu, 1, state.delta, 1);
axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
//copy_ongpu(l.batch*l.inputs, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

View File

@ -7,13 +7,13 @@
typedef layer detection_layer;
detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
void forward_detection_layer(const detection_layer l, network_state state);
void backward_detection_layer(const detection_layer l, network_state state);
void forward_detection_layer(const detection_layer l, network net);
void backward_detection_layer(const detection_layer l, network net);
void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
#ifdef GPU
void forward_detection_layer_gpu(const detection_layer l, network_state state);
void backward_detection_layer_gpu(detection_layer l, network_state state);
void forward_detection_layer_gpu(const detection_layer l, network net);
void backward_detection_layer_gpu(detection_layer l, network net);
#endif
#endif

View File

@ -346,7 +346,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
network_predict(net, input.data);
int w = val[t].w;
int h = val[t].h;
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5);
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5, 0);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
if (coco){
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@ -477,7 +477,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
network_predict(net, X);
int w = val[t].w;
int h = val[t].h;
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5);
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5, 0);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
if (coco){
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@ -541,7 +541,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
image sized = resize_image(orig, net.w, net.h);
char *id = basecfg(path);
network_predict(net, sized.data);
get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0, .5);
get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0, .5, 0);
if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);
char labelpath[4096];
@ -580,7 +580,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
}
}
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh)
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile)
{
list *options = read_data_cfg(datacfg);
char *name_list = option_find_str(options, "names", "data/names.list");
@ -624,21 +624,26 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
time=clock();
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh, 0);
if (l.softmax_tree && nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
draw_detections(sized, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
save_image(sized, "predictions");
show_image(sized, "predictions");
if(outfile){
save_image(sized, outfile);
}
else{
save_image(sized, "predictions");
show_image(sized, "predictions");
#ifdef OPENCV
cvWaitKey(0);
cvDestroyAllWindows();
#endif
}
free_image(im);
free_image(sized);
free(boxes);
free_ptrs((void **)probs, l.w*l.h*l.n);
#ifdef OPENCV
cvWaitKey(0);
cvDestroyAllWindows();
#endif
if (filename) break;
}
}
@ -684,7 +689,7 @@ void run_detector(int argc, char **argv)
char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0;
char *filename = (argc > 6) ? argv[6]: 0;
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh);
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile);
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);

View File

@ -35,26 +35,26 @@ void resize_dropout_layer(dropout_layer *l, int inputs)
#endif
}
void forward_dropout_layer(dropout_layer l, network_state state)
void forward_dropout_layer(dropout_layer l, network net)
{
int i;
if (!state.train) return;
if (!net.train) return;
for(i = 0; i < l.batch * l.inputs; ++i){
float r = rand_uniform(0, 1);
l.rand[i] = r;
if(r < l.probability) state.input[i] = 0;
else state.input[i] *= l.scale;
if(r < l.probability) net.input[i] = 0;
else net.input[i] *= l.scale;
}
}
void backward_dropout_layer(dropout_layer l, network_state state)
void backward_dropout_layer(dropout_layer l, network net)
{
int i;
if(!state.delta) return;
if(!net.delta) return;
for(i = 0; i < l.batch * l.inputs; ++i){
float r = l.rand[i];
if(r < l.probability) state.delta[i] = 0;
else state.delta[i] *= l.scale;
if(r < l.probability) net.delta[i] = 0;
else net.delta[i] *= l.scale;
}
}

View File

@ -8,13 +8,13 @@ typedef layer dropout_layer;
dropout_layer make_dropout_layer(int batch, int inputs, float probability);
void forward_dropout_layer(dropout_layer l, network_state state);
void backward_dropout_layer(dropout_layer l, network_state state);
void forward_dropout_layer(dropout_layer l, network net);
void backward_dropout_layer(dropout_layer l, network net);
void resize_dropout_layer(dropout_layer *l, int inputs);
#ifdef GPU
void forward_dropout_layer_gpu(dropout_layer l, network_state state);
void backward_dropout_layer_gpu(dropout_layer l, network_state state);
void forward_dropout_layer_gpu(dropout_layer l, network net);
void backward_dropout_layer_gpu(dropout_layer l, network net);
#endif
#endif

View File

@ -14,9 +14,9 @@ __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand
if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale;
}
void forward_dropout_layer_gpu(dropout_layer layer, network_state state)
void forward_dropout_layer_gpu(dropout_layer layer, network net)
{
if (!state.train) return;
if (!net.train) return;
int size = layer.inputs*layer.batch;
cuda_random(layer.rand_gpu, size);
/*
@ -27,15 +27,15 @@ void forward_dropout_layer_gpu(dropout_layer layer, network_state state)
cuda_push_array(layer.rand_gpu, layer.rand, size);
*/
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale);
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale);
check_error(cudaPeekAtLastError());
}
void backward_dropout_layer_gpu(dropout_layer layer, network_state state)
void backward_dropout_layer_gpu(dropout_layer layer, network net)
{
if(!state.delta) return;
if(!net.delta_gpu) return;
int size = layer.inputs*layer.batch;
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.delta, size, layer.rand_gpu, layer.probability, layer.scale);
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale);
check_error(cudaPeekAtLastError());
}

View File

@ -124,10 +124,10 @@ void update_gru_layer(layer l, int batch, float learning_rate, float momentum, f
update_connected_layer(*(l.output_layer), batch, learning_rate, momentum, decay);
}
void forward_gru_layer(layer l, network_state state)
void forward_gru_layer(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_z_layer = *(l.input_z_layer);
layer input_r_layer = *(l.input_r_layer);
@ -144,7 +144,7 @@ void forward_gru_layer(layer l, network_state state)
fill_cpu(l.outputs * l.batch * l.steps, 0, state_z_layer.delta, 1);
fill_cpu(l.outputs * l.batch * l.steps, 0, state_r_layer.delta, 1);
fill_cpu(l.outputs * l.batch * l.steps, 0, state_h_layer.delta, 1);
if(state.train) {
if(net.train) {
fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1);
copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1);
}
@ -154,7 +154,7 @@ void forward_gru_layer(layer l, network_state state)
forward_connected_layer(state_z_layer, s);
forward_connected_layer(state_r_layer, s);
s.input = state.input;
s.input = net.input;
forward_connected_layer(input_z_layer, s);
forward_connected_layer(input_r_layer, s);
forward_connected_layer(input_h_layer, s);
@ -188,7 +188,7 @@ void forward_gru_layer(layer l, network_state state)
copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1);
state.input += l.inputs*l.batch;
net.input += l.inputs*l.batch;
l.output += l.outputs*l.batch;
increment_layer(&input_z_layer, 1);
increment_layer(&input_r_layer, 1);
@ -200,7 +200,7 @@ void forward_gru_layer(layer l, network_state state)
}
}
void backward_gru_layer(layer l, network_state state)
void backward_gru_layer(layer l, network net)
{
}
@ -224,10 +224,10 @@ void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentu
update_connected_layer_gpu(*(l.state_h_layer), batch, learning_rate, momentum, decay);
}
void forward_gru_layer_gpu(layer l, network_state state)
void forward_gru_layer_gpu(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_z_layer = *(l.input_z_layer);
layer input_r_layer = *(l.input_r_layer);
@ -244,17 +244,17 @@ void forward_gru_layer_gpu(layer l, network_state state)
fill_ongpu(l.outputs * l.batch * l.steps, 0, state_z_layer.delta_gpu, 1);
fill_ongpu(l.outputs * l.batch * l.steps, 0, state_r_layer.delta_gpu, 1);
fill_ongpu(l.outputs * l.batch * l.steps, 0, state_h_layer.delta_gpu, 1);
if(state.train) {
if(net.train) {
fill_ongpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1);
copy_ongpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1);
}
for (i = 0; i < l.steps; ++i) {
s.input = l.state_gpu;
s.input_gpu = l.state_gpu;
forward_connected_layer_gpu(state_z_layer, s);
forward_connected_layer_gpu(state_r_layer, s);
s.input = state.input;
s.input_gpu = net.input_gpu;
forward_connected_layer_gpu(input_z_layer, s);
forward_connected_layer_gpu(input_r_layer, s);
forward_connected_layer_gpu(input_h_layer, s);
@ -272,7 +272,7 @@ void forward_gru_layer_gpu(layer l, network_state state)
copy_ongpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1);
mul_ongpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1);
s.input = l.forgot_state_gpu;
s.input_gpu = l.forgot_state_gpu;
forward_connected_layer_gpu(state_h_layer, s);
copy_ongpu(l.outputs*l.batch, input_h_layer.output_gpu, 1, l.h_gpu, 1);
@ -288,7 +288,7 @@ void forward_gru_layer_gpu(layer l, network_state state)
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1);
state.input += l.inputs*l.batch;
net.input_gpu += l.inputs*l.batch;
l.output_gpu += l.outputs*l.batch;
increment_layer(&input_z_layer, 1);
increment_layer(&input_r_layer, 1);
@ -300,10 +300,10 @@ void forward_gru_layer_gpu(layer l, network_state state)
}
}
void backward_gru_layer_gpu(layer l, network_state state)
void backward_gru_layer_gpu(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_z_layer = *(l.input_z_layer);
layer input_r_layer = *(l.input_r_layer);
@ -321,8 +321,8 @@ void backward_gru_layer_gpu(layer l, network_state state)
increment_layer(&state_r_layer, l.steps - 1);
increment_layer(&state_h_layer, l.steps - 1);
state.input += l.inputs*l.batch*(l.steps-1);
if(state.delta) state.delta += l.inputs*l.batch*(l.steps-1);
net.input_gpu += l.inputs*l.batch*(l.steps-1);
if(net.delta_gpu) net.delta_gpu += l.inputs*l.batch*(l.steps-1);
l.output_gpu += l.outputs*l.batch*(l.steps-1);
l.delta_gpu += l.outputs*l.batch*(l.steps-1);
for (i = l.steps-1; i >= 0; --i) {
@ -361,8 +361,8 @@ void backward_gru_layer_gpu(layer l, network_state state)
mul_ongpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1);
fill_ongpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1);
s.input = l.forgot_state_gpu;
s.delta = l.forgot_delta_gpu;
s.input_gpu = l.forgot_state_gpu;
s.delta_gpu = l.forgot_delta_gpu;
backward_connected_layer_gpu(state_h_layer, s);
if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu);
@ -374,22 +374,22 @@ void backward_gru_layer_gpu(layer l, network_state state)
gradient_array_ongpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, input_z_layer.delta_gpu);
copy_ongpu(l.outputs*l.batch, input_z_layer.delta_gpu, 1, state_z_layer.delta_gpu, 1);
s.input = l.prev_state_gpu;
s.delta = prev_delta_gpu;
s.input_gpu = l.prev_state_gpu;
s.delta_gpu = prev_delta_gpu;
backward_connected_layer_gpu(state_r_layer, s);
backward_connected_layer_gpu(state_z_layer, s);
s.input = state.input;
s.delta = state.delta;
s.input_gpu = net.input_gpu;
s.delta_gpu = net.delta_gpu;
backward_connected_layer_gpu(input_h_layer, s);
backward_connected_layer_gpu(input_r_layer, s);
backward_connected_layer_gpu(input_z_layer, s);
state.input -= l.inputs*l.batch;
if(state.delta) state.delta -= l.inputs*l.batch;
net.input_gpu -= l.inputs*l.batch;
if(net.delta_gpu) net.delta_gpu -= l.inputs*l.batch;
l.output_gpu -= l.outputs*l.batch;
l.delta_gpu -= l.outputs*l.batch;
increment_layer(&input_z_layer, -1);

View File

@ -8,13 +8,13 @@
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
void forward_gru_layer(layer l, network_state state);
void backward_gru_layer(layer l, network_state state);
void forward_gru_layer(layer l, network net);
void backward_gru_layer(layer l, network net);
void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay);
#ifdef GPU
void forward_gru_layer_gpu(layer l, network_state state);
void backward_gru_layer_gpu(layer l, network_state state);
void forward_gru_layer_gpu(layer l, network net);
void backward_gru_layer_gpu(layer l, network net);
void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_gru_layer(layer l);
void pull_gru_layer(layer l);

View File

@ -288,6 +288,25 @@ image image_distance(image a, image b)
return dist;
}
void ghost_image(image source, image dest, int dx, int dy)
{
int x,y,k;
float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5));
for(k = 0; k < source.c; ++k){
for(y = 0; y < source.h; ++y){
for(x = 0; x < source.w; ++x){
float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5));
float alpha = (1 - dist/max_dist);
if(alpha < 0) alpha = 0;
float v1 = get_pixel(source, x,y,k);
float v2 = get_pixel(dest, dx+x,dy+y,k);
float val = alpha*v1 + (1-alpha)*v2;
set_pixel(dest, dx+x, dy+y, k, val);
}
}
}
}
void embed_image(image source, image dest, int dx, int dy)
{
int x,y,k;

View File

@ -57,6 +57,7 @@ void normalize_image(image p);
image rotate_image(image m, float rad);
void rotate_image_cw(image im, int times);
void embed_image(image source, image dest, int dx, int dy);
void ghost_image(image source, image dest, int dx, int dy);
void place_image(image im, int w, int h, int dx, int dy, image canvas);
void saturate_image(image im, float sat);
void exposure_image(image im, float sat);

View File

@ -5,7 +5,8 @@
#include "stddef.h"
#include "tree.h"
struct network_state;
struct network;
typedef struct network network;
struct layer;
typedef struct layer layer;
@ -45,11 +46,11 @@ struct layer{
LAYER_TYPE type;
ACTIVATION activation;
COST_TYPE cost_type;
void (*forward) (struct layer, struct network_state);
void (*backward) (struct layer, struct network_state);
void (*forward) (struct layer, struct network);
void (*backward) (struct layer, struct network);
void (*update) (struct layer, int, float, float, float);
void (*forward_gpu) (struct layer, struct network_state);
void (*backward_gpu) (struct layer, struct network_state);
void (*forward_gpu) (struct layer, struct network);
void (*backward_gpu) (struct layer, struct network);
void (*update_gpu) (struct layer, int, float, float, float);
int batch_normalize;
int shortcut;
@ -58,6 +59,8 @@ struct layer{
int flipped;
int inputs;
int outputs;
int nweights;
int nbiases;
int extra;
int truths;
int h,w,c;
@ -176,6 +179,11 @@ struct layer{
float * m;
float * v;
float * bias_m;
float * bias_v;
float * scale_m;
float * scale_v;
float * z_cpu;
float * r_cpu;
@ -216,6 +224,10 @@ struct layer{
float *m_gpu;
float *v_gpu;
float *bias_m_gpu;
float *scale_m_gpu;
float *bias_v_gpu;
float *scale_v_gpu;
float * prev_state_gpu;
float * forgot_state_gpu;

View File

@ -88,7 +88,7 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
return l;
}
void forward_local_layer(const local_layer l, network_state state)
void forward_local_layer(const local_layer l, network net)
{
int out_h = local_out_height(l);
int out_w = local_out_width(l);
@ -100,13 +100,13 @@ void forward_local_layer(const local_layer l, network_state state)
}
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
float *input = net.input + i*l.w*l.h*l.c;
im2col_cpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, state.workspace);
l.size, l.stride, l.pad, net.workspace);
float *output = l.output + i*l.outputs;
for(j = 0; j < locations; ++j){
float *a = l.weights + j*l.size*l.size*l.c*l.n;
float *b = state.workspace + j;
float *b = net.workspace + j;
float *c = output + j;
int m = l.n;
@ -119,7 +119,7 @@ void forward_local_layer(const local_layer l, network_state state)
activate_array(l.output, l.outputs*l.batch, l.activation);
}
void backward_local_layer(local_layer l, network_state state)
void backward_local_layer(local_layer l, network net)
{
int i, j;
int locations = l.out_w*l.out_h;
@ -131,13 +131,13 @@ void backward_local_layer(local_layer l, network_state state)
}
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
float *input = net.input + i*l.w*l.h*l.c;
im2col_cpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, state.workspace);
l.size, l.stride, l.pad, net.workspace);
for(j = 0; j < locations; ++j){
float *a = l.delta + i*l.outputs + j;
float *b = state.workspace + j;
float *b = net.workspace + j;
float *c = l.weight_updates + j*l.size*l.size*l.c*l.n;
int m = l.n;
int n = l.size*l.size*l.c;
@ -146,11 +146,11 @@ void backward_local_layer(local_layer l, network_state state)
gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
}
if(state.delta){
if(net.delta){
for(j = 0; j < locations; ++j){
float *a = l.weights + j*l.size*l.size*l.c*l.n;
float *b = l.delta + i*l.outputs + j;
float *c = state.workspace + j;
float *c = net.workspace + j;
int m = l.size*l.size*l.c;
int n = 1;
@ -159,7 +159,7 @@ void backward_local_layer(local_layer l, network_state state)
gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
}
col2im_cpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w);
}
}
}
@ -178,7 +178,7 @@ void update_local_layer(local_layer l, int batch, float learning_rate, float mom
#ifdef GPU
void forward_local_layer_gpu(const local_layer l, network_state state)
void forward_local_layer_gpu(const local_layer l, network net)
{
int out_h = local_out_height(l);
int out_w = local_out_width(l);
@ -190,13 +190,13 @@ void forward_local_layer_gpu(const local_layer l, network_state state)
}
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
float *input = net.input_gpu + i*l.w*l.h*l.c;
im2col_ongpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, state.workspace);
l.size, l.stride, l.pad, net.workspace);
float *output = l.output_gpu + i*l.outputs;
for(j = 0; j < locations; ++j){
float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
float *b = state.workspace + j;
float *b = net.workspace + j;
float *c = output + j;
int m = l.n;
@ -209,7 +209,7 @@ void forward_local_layer_gpu(const local_layer l, network_state state)
activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
void backward_local_layer_gpu(local_layer l, network_state state)
void backward_local_layer_gpu(local_layer l, network net)
{
int i, j;
int locations = l.out_w*l.out_h;
@ -220,13 +220,13 @@ void backward_local_layer_gpu(local_layer l, network_state state)
}
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
float *input = net.input_gpu + i*l.w*l.h*l.c;
im2col_ongpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, state.workspace);
l.size, l.stride, l.pad, net.workspace);
for(j = 0; j < locations; ++j){
float *a = l.delta_gpu + i*l.outputs + j;
float *b = state.workspace + j;
float *b = net.workspace + j;
float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n;
int m = l.n;
int n = l.size*l.size*l.c;
@ -235,11 +235,11 @@ void backward_local_layer_gpu(local_layer l, network_state state)
gemm_ongpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
}
if(state.delta){
if(net.delta_gpu){
for(j = 0; j < locations; ++j){
float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
float *b = l.delta_gpu + i*l.outputs + j;
float *c = state.workspace + j;
float *c = net.workspace + j;
int m = l.size*l.size*l.c;
int n = 1;
@ -248,7 +248,7 @@ void backward_local_layer_gpu(local_layer l, network_state state)
gemm_ongpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
}
col2im_ongpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
col2im_ongpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu+i*l.c*l.h*l.w);
}
}
}

View File

@ -10,8 +10,8 @@
typedef layer local_layer;
#ifdef GPU
void forward_local_layer_gpu(local_layer layer, network_state state);
void backward_local_layer_gpu(local_layer layer, network_state state);
void forward_local_layer_gpu(local_layer layer, network net);
void backward_local_layer_gpu(local_layer layer, network net);
void update_local_layer_gpu(local_layer layer, int batch, float learning_rate, float momentum, float decay);
void push_local_layer(local_layer layer);
@ -20,8 +20,8 @@ void pull_local_layer(local_layer layer);
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation);
void forward_local_layer(const local_layer layer, network_state state);
void backward_local_layer(local_layer layer, network_state state);
void forward_local_layer(const local_layer layer, network net);
void backward_local_layer(local_layer layer, network net);
void update_local_layer(local_layer layer, int batch, float learning_rate, float momentum, float decay);
void bias_output(float *output, float *biases, int batch, int n, int size);

258
src/lsd.c
View File

@ -4,6 +4,7 @@
#include "parser.h"
#include "blas.h"
/*
void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear)
{
#ifdef GPU
@ -58,36 +59,21 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg
float aloss_avg = -1;
float floss_avg = -1;
network_state fstate = {0};
fstate.index = 0;
fstate.net = fnet;
int x_size = get_network_input_size(fnet)*fnet.batch;
int y_size = get_network_output_size(fnet)*fnet.batch;
fstate.input = cuda_make_array(0, x_size);
fstate.truth = cuda_make_array(0, y_size);
fstate.delta = cuda_make_array(0, x_size);
fstate.train = 1;
fnet.train=1;
int x_size = fnet.inputs*fnet.batch;
int y_size = fnet.truths*fnet.batch;
float *X = calloc(x_size, sizeof(float));
float *y = calloc(y_size, sizeof(float));
float *ones = cuda_make_array(0, anet.batch);
fill_ongpu(anet.batch, .9, ones, 1);
network_state astate = {0};
astate.index = 0;
astate.net = anet;
int ax_size = get_network_input_size(anet)*anet.batch;
int ay_size = get_network_output_size(anet)*anet.batch;
astate.input = 0;
astate.truth = ones;
astate.delta = cuda_make_array(0, ax_size);
astate.train = 1;
int ax_size = anet.inputs*anet.batch;
int ay_size = anet.truths*anet.batch;
fill_ongpu(ay_size, .9, anet.truth_gpu, 1);
anet.delta_gpu = cuda_make_array(0, ax_size);
anet.train = 1;
network_state gstate = {0};
gstate.index = 0;
gstate.net = gnet;
int gx_size = get_network_input_size(gnet)*gnet.batch;
int gy_size = get_network_output_size(gnet)*gnet.batch;
int gx_size = gnet.inputs*gnet.batch;
int gy_size = gnet.truths*gnet.batch;
gstate.input = cuda_make_array(0, gx_size);
gstate.truth = 0;
gstate.delta = 0;
@ -155,7 +141,7 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg
floss += get_network_cost(fnet) /(fnet.subdivisions*fnet.batch);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch);
for(k = 0; k < gnet.batch; ++k){
int index = j*gnet.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1);
@ -164,11 +150,13 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg
}
}
*/
/*
image sim = float_to_image(anet.w, anet.h, anet.c, style.X.vals[j]);
show_image(sim, "style");
cvWaitKey(0);
*/
/*
harmless_update_network_gpu(anet);
@ -205,7 +193,9 @@ void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg
}
#endif
}
*/
/*
void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear)
{
#ifdef GPU
@ -319,7 +309,8 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear
get_next_batch(train, net.batch, j*net.batch, pixs, y);
get_next_batch(gray, net.batch, j*net.batch, graypixs, y);
cuda_push_array(gstate.input, graypixs, x_size);
cuda_push_array(gstate.truth, pixs, x_size);
cuda_push_array(gstate.truth, pixs, y_size);
*/
/*
image origi = float_to_image(net.w, net.h, 3, pixs);
image grayi = float_to_image(net.w, net.h, 3, graypixs);
@ -327,6 +318,7 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear
show_image(origi, "orig");
cvWaitKey(0);
*/
/*
*net.seen += net.batch;
forward_network_gpu(net, gstate);
@ -350,7 +342,7 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch);
for(k = 0; k < net.batch; ++k){
int index = j*net.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1);
@ -393,6 +385,7 @@ void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear
save_weights(net, buff);
#endif
}
*/
void test_dcgan(char *cfgfile, char *weightfile)
{
@ -428,7 +421,7 @@ void test_dcgan(char *cfgfile, char *weightfile)
network_predict(net, X);
image out = get_network_image_layer(net, imlayer);
//yuv_to_rgb(out);
constrain_image(out);
normalize_image(out);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
show_image(out, "out");
save_image(out, "out");
@ -440,33 +433,43 @@ void test_dcgan(char *cfgfile, char *weightfile)
}
}
void dcgan_batch(network gnet, network anet)
{
//float *input = calloc(x_size, sizeof(float));
}
void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear)
void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images)
{
#ifdef GPU
//char *train_images = "/home/pjreddie/data/coco/train1.txt";
//char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list";
//char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list";
//char *train_images = "data/64.txt";
//char *train_images = "data/alp.txt";
//char *train_images = "data/cifar.txt";
char *backup_directory = "/home/pjreddie/backup/";
srand(time(0));
char *base = basecfg(cfg);
char *abase = basecfg(acfg);
printf("%s\n", base);
network net = load_network(cfg, weight, clear);
network gnet = load_network(cfg, weight, clear);
network anet = load_network(acfg, aweight, clear);
float orig_rate = anet.learning_rate;
int start = 0;
int i, j, k;
layer imlayer = {0};
for (i = 0; i < net.n; ++i) {
if (net.layers[i].out_c == 3) {
imlayer = net.layers[i];
for (i = 0; i < gnet.n; ++i) {
if (gnet.layers[i].out_c == 3) {
imlayer = gnet.layers[i];
break;
}
}
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
i = *net.seen/imgs;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet.learning_rate, gnet.momentum, gnet.decay);
int imgs = gnet.batch*gnet.subdivisions;
i = *gnet.seen/imgs;
data train, buffer;
@ -480,131 +483,144 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear)
args.m = plist->size;
args.d = &buffer;
args.type = CLASSIFICATION_DATA;
args.classes = 2;
args.threads=16;
args.classes = 1;
char *ls[2] = {"imagenet", "zzzzzzzz"};
args.labels = ls;
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
network_state gstate = {0};
gstate.index = 0;
gstate.net = net;
int x_size = get_network_input_size(net)*net.batch;
int y_size = get_network_output_size(net)*net.batch;
gstate.input = cuda_make_array(0, x_size);
gstate.truth = cuda_make_array(0, y_size);
gstate.train = 1;
float *input = calloc(x_size, sizeof(float));
float *y = calloc(y_size, sizeof(float));
gnet.train = 1;
anet.train = 1;
int x_size = gnet.inputs*gnet.batch;
int y_size = gnet.truths*gnet.batch;
float *imerror = cuda_make_array(0, y_size);
network_state astate = {0};
astate.index = 0;
astate.net = anet;
int ay_size = get_network_output_size(anet)*anet.batch;
astate.input = 0;
astate.truth = 0;
astate.delta = 0;
astate.train = 1;
float *ones_gpu = cuda_make_array(0, ay_size);
fill_ongpu(ay_size, .1, ones_gpu, 1);
fill_ongpu(ay_size/2, .9, ones_gpu, 2);
int ay_size = anet.truths*anet.batch;
float aloss_avg = -1;
//data generated = copy_data(train);
while (get_current_batch(net) < net.max_batches) {
while (get_current_batch(gnet) < gnet.max_batches) {
start += 1;
i += 1;
time=clock();
pthread_join(load_thread, 0);
train = buffer;
//translate_data_rows(train, -.5);
//scale_data_rows(train, 2);
load_thread = load_data_in_thread(args);
printf("Loaded: %lf seconds\n", sec(clock()-time));
data gen = copy_data(train);
for(j = 0; j < imgs; ++j){
train.y.vals[j][0] = .9;
train.y.vals[j][1] = .1;
gen.y.vals[j][0] = .1;
gen.y.vals[j][1] = .9;
for (j = 0; j < imgs; ++j) {
train.y.vals[j][0] = .95;
gen.y.vals[j][0] = .05;
}
time=clock();
for(j = 0; j < net.subdivisions; ++j){
get_next_batch(train, net.batch, j*net.batch, y, 0);
for(j = 0; j < gnet.subdivisions; ++j){
get_next_batch(train, gnet.batch, j*gnet.batch, gnet.truth, 0);
int z;
for(z = 0; z < x_size; ++z){
input[z] = rand_normal();
gnet.input[z] = rand_normal();
}
cuda_push_array(gstate.input, input, x_size);
cuda_push_array(gstate.truth, y, y_size);
*net.seen += net.batch;
forward_network_gpu(net, gstate);
cuda_push_array(gnet.input_gpu, gnet.input, x_size);
cuda_push_array(gnet.truth_gpu, gnet.truth, y_size);
*gnet.seen += gnet.batch;
forward_network_gpu(gnet);
fill_ongpu(imlayer.outputs*imlayer.batch, 0, imerror, 1);
astate.input = imlayer.output_gpu;
astate.delta = imerror;
astate.truth = ones_gpu;
forward_network_gpu(anet, astate);
backward_network_gpu(anet, astate);
fill_ongpu(anet.truths*anet.batch, .95, anet.truth_gpu, 1);
copy_ongpu(anet.inputs*anet.batch, imlayer.output_gpu, 1, anet.input_gpu, 1);
anet.delta_gpu = imerror;
forward_network_gpu(anet);
backward_network_gpu(anet);
float genaloss = *anet.cost / anet.batch;
printf("%f\n", genaloss);
scal_ongpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);
scal_ongpu(imlayer.outputs*imlayer.batch, .001, net.layers[net.n-1].delta_gpu, 1);
scal_ongpu(imlayer.outputs*imlayer.batch, .00, gnet.layers[gnet.n-1].delta_gpu, 1);
printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch));
printf("features %f\n", cuda_mag_array(net.layers[net.n-1].delta_gpu, imlayer.outputs*imlayer.batch));
printf("features %f\n", cuda_mag_array(gnet.layers[gnet.n-1].delta_gpu, imlayer.outputs*imlayer.batch));
axpy_ongpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net.layers[net.n-1].delta_gpu, 1);
axpy_ongpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet.layers[gnet.n-1].delta_gpu, 1);
backward_network_gpu(net, gstate);
backward_network_gpu(gnet);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
for(k = 0; k < net.batch; ++k){
int index = j*net.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gen.X.vals[index], 1);
gen.y.vals[index][0] = .1;
for(k = 0; k < gnet.batch; ++k){
int index = j*gnet.batch + k;
copy_cpu(gnet.outputs, gnet.output + k*gnet.outputs, 1, gen.X.vals[index], 1);
}
}
harmless_update_network_gpu(anet);
data merge = concat_data(train, gen);
randomize_data(merge);
//randomize_data(merge);
float aloss = train_network(anet, merge);
update_network_gpu(net);
//translate_image(im, 1);
//scale_image(im, .5);
//translate_image(im2, 1);
//scale_image(im2, .5);
#ifdef OPENCV
if(display){
image im = float_to_image(anet.w, anet.h, anet.c, gen.X.vals[0]);
image im2 = float_to_image(anet.w, anet.h, anet.c, train.X.vals[0]);
show_image(im, "gen");
show_image(im2, "train");
cvWaitKey(50);
}
#endif
/*
if(aloss < .1){
anet.learning_rate = 0;
} else if (aloss > .3){
anet.learning_rate = orig_rate;
}
*/
update_network_gpu(gnet);
free_data(merge);
free_data(train);
free_data(gen);
if (aloss_avg < 0) aloss_avg = aloss;
aloss_avg = aloss_avg*.9 + aloss*.1;
printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs);
printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs);
if(i%1000==0){
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
save_weights(gnet, buff);
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
save_weights(anet, buff);
}
if(i%100==0){
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
save_weights(gnet, buff);
sprintf(buff, "%s/%s.backup", backup_directory, abase);
save_weights(anet, buff);
}
}
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
save_weights(gnet, buff);
#endif
}
/*
void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear)
{
#ifdef GPU
@ -645,8 +661,8 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
args.d = &buffer;
args.type = CLASSIFICATION_DATA;
args.classes = 2;
char *ls[2] = {"imagenet", "zzzzzzz"};
args.classes = 1;
char *ls[2] = {"imagenet"};
args.labels = ls;
pthread_t load_thread = load_data_in_thread(args);
@ -676,8 +692,7 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
float *imerror = cuda_make_array(0, imlayer.outputs*imlayer.batch);
float *ones_gpu = cuda_make_array(0, ay_size);
fill_ongpu(ay_size, .1, ones_gpu, 1);
fill_ongpu(ay_size/2, .9, ones_gpu, 2);
fill_ongpu(ay_size, 1, ones_gpu, 1);
float aloss_avg = -1;
float gloss_avg = -1;
@ -697,10 +712,8 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
for(j = 0; j < imgs; ++j){
image gim = float_to_image(net.w, net.h, net.c, gray.X.vals[j]);
grayscale_image_3c(gim);
train.y.vals[j][0] = .9;
train.y.vals[j][1] = .1;
gray.y.vals[j][0] = .1;
gray.y.vals[j][1] = .9;
train.y.vals[j][0] = 1;
gray.y.vals[j][0] = 0;
}
time=clock();
float gloss = 0;
@ -709,14 +722,16 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
get_next_batch(train, net.batch, j*net.batch, pixs, 0);
get_next_batch(gray, net.batch, j*net.batch, graypixs, 0);
cuda_push_array(gstate.input, graypixs, x_size);
cuda_push_array(gstate.truth, pixs, x_size);
/*
image origi = float_to_image(net.w, net.h, 3, pixs);
image grayi = float_to_image(net.w, net.h, 3, graypixs);
show_image(grayi, "gray");
show_image(origi, "orig");
cvWaitKey(0);
cuda_push_array(gstate.truth, pixs, y_size);
*/
/*
image origi = float_to_image(net.w, net.h, 3, pixs);
image grayi = float_to_image(net.w, net.h, 3, graypixs);
show_image(grayi, "gray");
show_image(origi, "orig");
cvWaitKey(0);
*/
/*
*net.seen += net.batch;
forward_network_gpu(net, gstate);
@ -727,7 +742,7 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
forward_network_gpu(anet, astate);
backward_network_gpu(anet, astate);
scal_ongpu(imlayer.outputs*imlayer.batch, 1./1000., net.layers[net.n-1].delta_gpu, 1);
scal_ongpu(imlayer.outputs*imlayer.batch, 1./100., net.layers[net.n-1].delta_gpu, 1);
scal_ongpu(imlayer.outputs*imlayer.batch, 1, imerror, 1);
@ -741,7 +756,7 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch);
for(k = 0; k < net.batch; ++k){
int index = j*net.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1);
@ -750,7 +765,7 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
harmless_update_network_gpu(anet);
data merge = concat_data(train, gray);
randomize_data(merge);
//randomize_data(merge);
float aloss = train_network(anet, merge);
update_network_gpu(net);
@ -782,7 +797,9 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
save_weights(net, buff);
#endif
}
*/
/*
void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear)
{
#ifdef GPU
@ -913,7 +930,7 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch);
for(k = 0; k < net.batch; ++k){
int index = j*net.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1);
@ -956,7 +973,9 @@ void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfi
save_weights(net, buff);
#endif
}
*/
/*
void train_lsd(char *cfgfile, char *weightfile, int clear)
{
char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
@ -1035,6 +1054,7 @@ void train_lsd(char *cfgfile, char *weightfile, int clear)
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
}
*/
void test_lsd(char *cfgfile, char *weightfile, char *filename, int gray)
{
@ -1103,18 +1123,20 @@ void run_lsd(int argc, char **argv)
}
int clear = find_arg(argc, argv, "-clear");
int display = find_arg(argc, argv, "-display");
char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list");
char *cfg = argv[3];
char *weights = (argc > 4) ? argv[4] : 0;
char *filename = (argc > 5) ? argv[5] : 0;
char *acfg = argv[5];
char *aweights = (argc > 6) ? argv[6] : 0;
if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear);
else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear);
else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear);
else if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear);
//if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear);
//else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear);
//else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear);
//else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear);
if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file);
else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights);
else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear);
else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0);
else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1);
/*

View File

@ -76,7 +76,7 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
#endif
}
void forward_maxpool_layer(const maxpool_layer l, network_state state)
void forward_maxpool_layer(const maxpool_layer l, network net)
{
int b,i,j,k,m,n;
int w_offset = -l.pad;
@ -100,7 +100,7 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state)
int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c));
int valid = (cur_h >= 0 && cur_h < l.h &&
cur_w >= 0 && cur_w < l.w);
float val = (valid != 0) ? state.input[index] : -FLT_MAX;
float val = (valid != 0) ? net.input[index] : -FLT_MAX;
max_i = (val > max) ? index : max_i;
max = (val > max) ? val : max;
}
@ -113,7 +113,7 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state)
}
}
void backward_maxpool_layer(const maxpool_layer l, network_state state)
void backward_maxpool_layer(const maxpool_layer l, network net)
{
int i;
int h = l.out_h;
@ -121,7 +121,7 @@ void backward_maxpool_layer(const maxpool_layer l, network_state state)
int c = l.c;
for(i = 0; i < h*w*c*l.batch; ++i){
int index = l.indexes[i];
state.delta[index] += l.delta[i];
net.delta[index] += l.delta[i];
}
}

View File

@ -11,12 +11,12 @@ typedef layer maxpool_layer;
image get_maxpool_image(maxpool_layer l);
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
void forward_maxpool_layer(const maxpool_layer l, network_state state);
void backward_maxpool_layer(const maxpool_layer l, network_state state);
void forward_maxpool_layer(const maxpool_layer l, network net);
void backward_maxpool_layer(const maxpool_layer l, network net);
#ifdef GPU
void forward_maxpool_layer_gpu(maxpool_layer l, network_state state);
void backward_maxpool_layer_gpu(maxpool_layer l, network_state state);
void forward_maxpool_layer_gpu(maxpool_layer l, network net);
void backward_maxpool_layer_gpu(maxpool_layer l, network net);
#endif
#endif

View File

@ -84,7 +84,7 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
prev_delta[index] += d;
}
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net)
{
int h = layer.out_h;
int w = layer.out_w;
@ -92,15 +92,15 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta
size_t n = h*w*c*layer.batch;
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu);
check_error(cudaPeekAtLastError());
}
extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net)
{
size_t n = layer.h*layer.w*layer.c*layer.batch;
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu);
check_error(cudaPeekAtLastError());
}

View File

@ -164,26 +164,26 @@ network make_network(int n)
net.n = n;
net.layers = calloc(net.n, sizeof(layer));
net.seen = calloc(1, sizeof(int));
#ifdef GPU
net.input_gpu = calloc(1, sizeof(float *));
net.truth_gpu = calloc(1, sizeof(float *));
#endif
net.cost = calloc(1, sizeof(float));
return net;
}
void forward_network(network net, network_state state)
void forward_network(network net)
{
state.workspace = net.workspace;
int i;
for(i = 0; i < net.n; ++i){
state.index = i;
net.index = i;
layer l = net.layers[i];
if(l.delta){
fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
}
l.forward(l, state);
state.input = l.output;
l.forward(l, net);
net.input = l.output;
if(l.truth) {
net.truth = l.output;
}
}
calc_network_cost(net);
}
void update_network(network net)
@ -199,17 +199,7 @@ void update_network(network net)
}
}
float *get_network_output(network net)
{
#ifdef GPU
if (gpu_index >= 0) return get_network_output_gpu(net);
#endif
int i;
for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break;
return net.layers[i].output;
}
float get_network_cost(network net)
void calc_network_cost(network net)
{
int i;
float sum = 0;
@ -220,54 +210,43 @@ float get_network_cost(network net)
++count;
}
}
return sum/count;
*net.cost = sum/count;
}
int get_predicted_class_network(network net)
{
float *out = get_network_output(net);
int k = get_network_output_size(net);
return max_index(out, k);
return max_index(net.output, net.outputs);
}
void backward_network(network net, network_state state)
void backward_network(network net)
{
int i;
float *original_input = state.input;
float *original_delta = state.delta;
state.workspace = net.workspace;
network orig = net;
for(i = net.n-1; i >= 0; --i){
state.index = i;
if(i == 0){
state.input = original_input;
state.delta = original_delta;
}else{
layer prev = net.layers[i-1];
state.input = prev.output;
state.delta = prev.delta;
}
layer l = net.layers[i];
if(l.stopbackward) break;
l.backward(l, state);
if(i == 0){
net = orig;
}else{
layer prev = net.layers[i-1];
net.input = prev.output;
net.delta = prev.delta;
}
net.index = i;
l.backward(l, net);
}
}
float train_network_datum(network net, float *x, float *y)
float train_network_datum(network net)
{
#ifdef GPU
if(gpu_index >= 0) return train_network_datum_gpu(net, x, y);
if(gpu_index >= 0) return train_network_datum_gpu(net);
#endif
network_state state;
*net.seen += net.batch;
state.index = 0;
state.net = net;
state.input = x;
state.delta = 0;
state.truth = y;
state.train = 1;
forward_network(net, state);
backward_network(net, state);
float error = get_network_cost(net);
net.train = 1;
forward_network(net);
backward_network(net);
float error = *net.cost;
if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net);
return error;
}
@ -275,18 +254,14 @@ float train_network_datum(network net, float *x, float *y)
float train_network_sgd(network net, data d, int n)
{
int batch = net.batch;
float *X = calloc(batch*d.X.cols, sizeof(float));
float *y = calloc(batch*d.y.cols, sizeof(float));
int i;
float sum = 0;
for(i = 0; i < n; ++i){
get_random_batch(d, batch, X, y);
float err = train_network_datum(net, X, y);
get_random_batch(d, batch, net.input, net.truth);
float err = train_network_datum(net);
sum += err;
}
free(X);
free(y);
return (float)sum/(n*batch);
}
@ -295,43 +270,14 @@ float train_network(network net, data d)
assert(d.X.rows % net.batch == 0);
int batch = net.batch;
int n = d.X.rows / batch;
float *X = calloc(batch*d.X.cols, sizeof(float));
float *y = calloc(batch*d.y.cols, sizeof(float));
int i;
float sum = 0;
for(i = 0; i < n; ++i){
get_next_batch(d, batch, i*batch, X, y);
float err = train_network_datum(net, X, y);
get_next_batch(d, batch, i*batch, net.input, net.truth);
float err = train_network_datum(net);
sum += err;
}
free(X);
free(y);
return (float)sum/(n*batch);
}
float train_network_batch(network net, data d, int n)
{
int i,j;
network_state state;
state.index = 0;
state.net = net;
state.train = 1;
state.delta = 0;
float sum = 0;
int batch = 2;
for(i = 0; i < n; ++i){
for(j = 0; j < batch; ++j){
int index = rand()%d.X.rows;
state.input = d.X.vals[index];
state.truth = d.y.vals[index];
forward_network(net, state);
backward_network(net, state);
sum += get_network_cost(net);
}
update_network(net);
}
return (float)sum/(n*batch);
}
@ -353,9 +299,7 @@ int resize_network(network *net, int w, int h)
{
#ifdef GPU
cuda_set_device(net->gpu_index);
if(gpu_index >= 0){
cuda_free(net->workspace);
}
cuda_free(net->workspace);
#endif
int i;
//if(w == net->w && h == net->h) return 0;
@ -395,14 +339,22 @@ int resize_network(network *net, int w, int h)
h = l.out_h;
if(l.type == AVGPOOL) break;
}
layer out = get_network_output_layer(*net);
net->inputs = net->layers[0].inputs;
net->outputs = out.outputs;
net->truths = out.outputs;
if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths;
net->output = out.output;
free(net->input);
free(net->truth);
net->input = calloc(net->inputs*net->batch, sizeof(float));
net->truth = calloc(net->truths*net->batch, sizeof(float));
#ifdef GPU
if(gpu_index >= 0){
if(net->input_gpu) {
cuda_free(*net->input_gpu);
*net->input_gpu = 0;
cuda_free(*net->truth_gpu);
*net->truth_gpu = 0;
}
cuda_free(net->input_gpu);
cuda_free(net->truth_gpu);
net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch);
net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch);
net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
}else {
free(net->workspace);
@ -416,18 +368,6 @@ int resize_network(network *net, int w, int h)
return 0;
}
int get_network_output_size(network net)
{
int i;
for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break;
return net.layers[i].outputs;
}
int get_network_input_size(network net)
{
return net.layers[0].inputs;
}
detection_layer get_network_detection_layer(network net)
{
int i;
@ -444,9 +384,9 @@ detection_layer get_network_detection_layer(network net)
image get_network_image_layer(network net, int i)
{
layer l = net.layers[i];
#ifdef GPU
cuda_pull_array(l.output_gpu, l.output, l.outputs);
#endif
#ifdef GPU
cuda_pull_array(l.output_gpu, l.output, l.outputs);
#endif
if (l.out_w && l.out_h && l.out_c){
return float_to_image(l.out_w, l.out_h, l.out_c, l.output);
}
@ -481,9 +421,7 @@ void visualize_network(network net)
void top_predictions(network net, int k, int *index)
{
int size = get_network_output_size(net);
float *out = get_network_output(net);
top_k(out, size, k, index);
top_k(net.output, net.outputs, k, index);
}
@ -492,23 +430,18 @@ float *network_predict(network net, float *input)
#ifdef GPU
if(gpu_index >= 0) return network_predict_gpu(net, input);
#endif
network_state state;
state.net = net;
state.index = 0;
state.input = input;
state.truth = 0;
state.train = 0;
state.delta = 0;
forward_network(net, state);
float *out = get_network_output(net);
return out;
net.input = input;
net.truth = 0;
net.train = 0;
net.delta = 0;
forward_network(net);
return net.output;
}
matrix network_predict_data_multi(network net, data test, int n)
{
int i,j,b,m;
int k = get_network_output_size(net);
int k = net.outputs;
matrix pred = make_matrix(test.X.rows, k);
float *X = calloc(net.batch*test.X.rows, sizeof(float));
for(i = 0; i < test.X.rows; i += net.batch){
@ -533,7 +466,7 @@ matrix network_predict_data_multi(network net, data test, int n)
matrix network_predict_data(network net, data test)
{
int i,j,b;
int k = get_network_output_size(net);
int k = net.outputs;
matrix pred = make_matrix(test.X.rows, k);
float *X = calloc(net.batch*test.X.cols, sizeof(float));
for(i = 0; i < test.X.rows; i += net.batch){
@ -613,6 +546,15 @@ float *network_accuracies(network net, data d, int n)
return acc;
}
layer get_network_output_layer(network net)
{
int i;
for(i = net.n - 1; i >= 0; --i){
if(net.layers[i].type != COST) break;
}
return net.layers[i];
}
float network_accuracy_multi(network net, data d, int n)
{
matrix guess = network_predict_data_multi(net, d, n);
@ -628,10 +570,38 @@ void free_network(network net)
free_layer(net.layers[i]);
}
free(net.layers);
if(net.input) free(net.input);
if(net.truth) free(net.truth);
#ifdef GPU
if(*net.input_gpu) cuda_free(*net.input_gpu);
if(*net.truth_gpu) cuda_free(*net.truth_gpu);
if(net.input_gpu) free(net.input_gpu);
if(net.truth_gpu) free(net.truth_gpu);
if(net.input_gpu) cuda_free(net.input_gpu);
if(net.truth_gpu) cuda_free(net.truth_gpu);
#endif
}
// Some day...
layer network_output_layer(network net)
{
int i;
for(i = net.n - 1; i >= 0; --i){
if(net.layers[i].type != COST) break;
}
return net.layers[i];
}
int network_inputs(network net)
{
return net.layers[0].inputs;
}
int network_outputs(network net)
{
return network_output_layer(net).outputs;
}
float *network_output(network net)
{
return network_output_layer(net).output;
}

View File

@ -12,7 +12,6 @@ typedef enum {
} learning_rate_policy;
typedef struct network{
float *workspace;
int n;
int batch;
int *seen;
@ -21,7 +20,6 @@ typedef struct network{
float momentum;
float decay;
layer *layers;
int outputs;
float *output;
learning_rate_policy policy;
@ -43,6 +41,8 @@ typedef struct network{
float eps;
int inputs;
int outputs;
int truths;
int notruth;
int h, w, c;
int max_crop;
@ -56,32 +56,34 @@ typedef struct network{
int gpu_index;
tree *hierarchy;
#ifdef GPU
float **input_gpu;
float **truth_gpu;
#endif
} network;
typedef struct network_state {
float *truth;
float *input;
float *truth;
float *delta;
float *workspace;
int train;
int index;
network net;
} network_state;
float *cost;
#ifdef GPU
float *input_gpu;
float *truth_gpu;
float *delta_gpu;
float *output_gpu;
#endif
} network;
#ifdef GPU
float train_networks(network *nets, int n, data d, int interval);
void sync_nets(network *nets, int n, int interval);
float train_network_datum_gpu(network net, float *x, float *y);
float train_network_datum_gpu(network net);
float *network_predict_gpu(network net, float *input);
float * get_network_output_gpu_layer(network net, int i);
float * get_network_delta_gpu_layer(network net, int i);
float *get_network_output_gpu(network net);
void forward_network_gpu(network net, network_state state);
void backward_network_gpu(network net, network_state state);
void pull_network_output(network net);
void forward_network_gpu(network net);
void backward_network_gpu(network net);
void update_network_gpu(network net);
void harmless_update_network_gpu(network net);
#endif
@ -93,14 +95,13 @@ void compare_networks(network n1, network n2, data d);
char *get_layer_string(LAYER_TYPE a);
network make_network(int n);
void forward_network(network net, network_state state);
void backward_network(network net, network_state state);
void forward_network(network net);
void backward_network(network net);
void update_network(network net);
float train_network(network net, data d);
float train_network_batch(network net, data d, int n);
float train_network_sgd(network net, data d, int n);
float train_network_datum(network net, float *x, float *y);
float train_network_datum(network net);
matrix network_predict_data(network net, data test);
float *network_predict(network net, float *input);
@ -108,26 +109,17 @@ float network_accuracy(network net, data d);
float *network_accuracies(network net, data d, int n);
float network_accuracy_multi(network net, data d, int n);
void top_predictions(network net, int n, int *index);
float *get_network_output(network net);
float *get_network_output_layer(network net, int i);
float *get_network_delta_layer(network net, int i);
float *get_network_delta(network net);
int get_network_output_size_layer(network net, int i);
int get_network_output_size(network net);
image get_network_image(network net);
image get_network_image_layer(network net, int i);
layer get_network_output_layer(network net);
int get_predicted_class_network(network net);
void print_network(network net);
void visualize_network(network net);
int resize_network(network *net, int w, int h);
void set_batch_network(network *net, int b);
int get_network_input_size(network net);
float get_network_cost(network net);
network load_network(char *cfg, char *weights, int clear);
load_args get_base_args(network net);
int get_network_nuisance(network net);
int get_network_background(network net);
void calc_network_cost(network net);
#endif

View File

@ -35,46 +35,45 @@ extern "C" {
#include "blas.h"
}
float * get_network_output_gpu_layer(network net, int i);
float * get_network_delta_gpu_layer(network net, int i);
float * get_network_output_gpu(network net);
void forward_network_gpu(network net, network_state state)
void forward_network_gpu(network net)
{
state.workspace = net.workspace;
int i;
for(i = 0; i < net.n; ++i){
state.index = i;
net.index = i;
layer l = net.layers[i];
if(l.delta_gpu){
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
}
//if(l.c ==3 && i > 5) state.input = *net.input_gpu;
l.forward_gpu(l, state);
state.input = l.output_gpu;
if(l.truth) state.truth = l.output_gpu;
l.forward_gpu(l, net);
net.input_gpu = l.output_gpu;
net.input = l.output;
if(l.truth) {
net.truth_gpu = l.output_gpu;
net.truth = l.output;
}
}
pull_network_output(net);
calc_network_cost(net);
}
void backward_network_gpu(network net, network_state state)
void backward_network_gpu(network net)
{
state.workspace = net.workspace;
int i;
float * original_input = state.input;
float * original_delta = state.delta;
network orig = net;
for(i = net.n-1; i >= 0; --i){
state.index = i;
layer l = net.layers[i];
if(l.stopbackward) break;
if(i == 0){
state.input = original_input;
state.delta = original_delta;
net = orig;
}else{
layer prev = net.layers[i-1];
state.input = prev.output_gpu;
state.delta = prev.delta_gpu;
net.input = prev.output;
net.delta = prev.delta;
net.input_gpu = prev.output_gpu;
net.delta_gpu = prev.delta_gpu;
}
l.backward_gpu(l, state);
net.index = i;
l.backward_gpu(l, net);
}
}
@ -95,39 +94,30 @@ void update_network_gpu(network net)
void harmless_update_network_gpu(network net)
{
net.learning_rate = 0;
net.momentum = 1;
update_network_gpu(net);
}
void forward_backward_network_gpu(network net, float *x, float *y)
{
network_state state;
state.index = 0;
state.net = net;
int x_size = get_network_input_size(net)*net.batch;
int y_size = get_network_output_size(net)*net.batch;
if(net.layers[net.n-1].truths) y_size = net.layers[net.n-1].truths*net.batch;
if(!*net.input_gpu){
*net.input_gpu = cuda_make_array(x, x_size);
if(!net.notruth) *net.truth_gpu = cuda_make_array(y, y_size);
}else{
cuda_push_array(*net.input_gpu, x, x_size);
if(!net.notruth) cuda_push_array(*net.truth_gpu, y, y_size);
cuda_set_device(net.gpu_index);
int i;
for(i = 0; i < net.n; ++i){
layer l = net.layers[i];
if(l.weight_updates_gpu) fill_ongpu(l.nweights, 0, l.weight_updates_gpu, 1);
if(l.bias_updates_gpu) fill_ongpu(l.nbiases, 0, l.bias_updates_gpu, 1);
if(l.scale_updates_gpu) fill_ongpu(l.nbiases, 0, l.scale_updates_gpu, 1);
}
state.input = *net.input_gpu;
state.delta = 0;
state.truth = *net.truth_gpu;
state.train = 1;
forward_network_gpu(net, state);
backward_network_gpu(net, state);
}
float train_network_datum_gpu(network net, float *x, float *y)
float train_network_datum_gpu(network net)
{
*net.seen += net.batch;
forward_backward_network_gpu(net, x, y);
float error = get_network_cost(net);
int x_size = net.inputs*net.batch;
int y_size = net.truths*net.batch;
cuda_push_array(net.input_gpu, net.input, x_size);
cuda_push_array(net.truth_gpu, net.truth, y_size);
net.train = 1;
forward_network_gpu(net);
backward_network_gpu(net);
float error = *net.cost;
if (((*net.seen) / net.batch) % net.subdivisions == 0) update_network_gpu(net);
return error;
@ -384,34 +374,19 @@ float train_networks(network *nets, int n, data d, int interval)
return (float)sum/(n);
}
float *get_network_output_layer_gpu(network net, int i)
void pull_network_output(network net)
{
layer l = net.layers[i];
if(l.type != REGION) cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch);
return l.output;
}
float *get_network_output_gpu(network net)
{
int i;
for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break;
return get_network_output_layer_gpu(net, i);
layer l = get_network_output_layer(net);
cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch);
}
float *network_predict_gpu(network net, float *input)
{
cuda_set_device(net.gpu_index);
int size = get_network_input_size(net) * net.batch;
network_state state;
state.index = 0;
state.net = net;
state.input = cuda_make_array(input, size);
state.truth = 0;
state.train = 0;
state.delta = 0;
forward_network_gpu(net, state);
float *out = get_network_output_gpu(net);
cuda_free(state.input);
return out;
cuda_push_array(net.input_gpu, input, net.inputs*net.batch);
net.truth = 0;
net.train = 0;
forward_network_gpu(net);
return net.output;
}

View File

@ -46,32 +46,29 @@ void optimize_picture(network *net, image orig, int max_layer, float scale, floa
image delta = make_image(im.w, im.h, im.c);
network_state state = {0};
state.net = *net;
#ifdef GPU
state.input = cuda_make_array(im.data, im.w*im.h*im.c);
state.delta = cuda_make_array(im.data, im.w*im.h*im.c);
net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c);
cuda_push_array(net->input_gpu, im.data, net->inputs);
forward_network_gpu(*net, state);
forward_network_gpu(*net);
copy_ongpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1);
cuda_pull_array(last.delta_gpu, last.delta, last.outputs);
calculate_loss(last.delta, last.delta, last.outputs, thresh);
cuda_push_array(last.delta_gpu, last.delta, last.outputs);
backward_network_gpu(*net, state);
backward_network_gpu(*net);
cuda_pull_array(state.delta, delta.data, im.w*im.h*im.c);
cuda_free(state.input);
cuda_free(state.delta);
cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c);
cuda_free(net->delta_gpu);
net->delta_gpu = 0;
#else
state.input = im.data;
state.delta = delta.data;
forward_network(*net, state);
net->input = im.data;
net->delta = delta.data;
forward_network(*net);
copy_cpu(last.outputs, last.output, 1, last.delta, 1);
calculate_loss(last.output, last.delta, last.outputs, thresh);
backward_network(*net, state);
backward_network(*net);
#endif
if(flip) flip_image(delta);
@ -134,31 +131,30 @@ void smooth(image recon, image update, float lambda, int num)
void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters)
{
int iter = 0;
layer l = get_network_output_layer(net);
for (iter = 0; iter < iters; ++iter) {
image delta = make_image(recon.w, recon.h, recon.c);
network_state state = {0};
state.net = net;
#ifdef GPU
state.input = cuda_make_array(recon.data, recon.w*recon.h*recon.c);
state.delta = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
state.truth = cuda_make_array(features, get_network_output_size(net));
cuda_push_array(net.input_gpu, recon.data, recon.w*recon.h*recon.c);
cuda_push_array(net.truth_gpu, features, net.truths);
net.delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
forward_network_gpu(net, state);
backward_network_gpu(net, state);
forward_network_gpu(net);
copy_ongpu(l.outputs, net.truth_gpu, 1, l.delta_gpu, 1);
axpy_ongpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1);
backward_network_gpu(net);
cuda_pull_array(state.delta, delta.data, delta.w*delta.h*delta.c);
cuda_pull_array(net.delta_gpu, delta.data, delta.w*delta.h*delta.c);
cuda_free(state.input);
cuda_free(state.delta);
cuda_free(state.truth);
cuda_free(net.delta_gpu);
#else
state.input = recon.data;
state.delta = delta.data;
state.truth = features;
net.input = recon.data;
net.delta = delta.data;
net.truth = features;
forward_network(net, state);
backward_network(net, state);
forward_network(net);
backward_network(net);
#endif
axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1);
@ -328,11 +324,12 @@ void run_nightmare(int argc, char **argv)
free_image(im);
im = resized;
}
im = letterbox_image(im, net.w, net.h);
//im = letterbox_image(im, net.w, net.h);
float *features = 0;
image update;
if (reconstruct){
net.n = max_layer;
resize_network(&net, im.w, im.h);
int zz = 0;

View File

@ -62,7 +62,7 @@ void resize_normalization_layer(layer *layer, int w, int h)
#endif
}
void forward_normalization_layer(const layer layer, network_state state)
void forward_normalization_layer(const layer layer, network net)
{
int k,b;
int w = layer.w;
@ -73,7 +73,7 @@ void forward_normalization_layer(const layer layer, network_state state)
for(b = 0; b < layer.batch; ++b){
float *squared = layer.squared + w*h*c*b;
float *norms = layer.norms + w*h*c*b;
float *input = state.input + w*h*c*b;
float *input = net.input + w*h*c*b;
pow_cpu(w*h*c, 2, input, 1, squared, 1);
const_cpu(w*h, layer.kappa, norms, 1);
@ -90,10 +90,10 @@ void forward_normalization_layer(const layer layer, network_state state)
}
}
pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1);
mul_cpu(w*h*c*layer.batch, net.input, 1, layer.output, 1);
}
void backward_normalization_layer(const layer layer, network_state state)
void backward_normalization_layer(const layer layer, network net)
{
// TODO This is approximate ;-)
// Also this should add in to delta instead of overwritting.
@ -101,12 +101,12 @@ void backward_normalization_layer(const layer layer, network_state state)
int w = layer.w;
int h = layer.h;
int c = layer.c;
pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1);
pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, net.delta, 1);
mul_cpu(w*h*c*layer.batch, layer.delta, 1, net.delta, 1);
}
#ifdef GPU
void forward_normalization_layer_gpu(const layer layer, network_state state)
void forward_normalization_layer_gpu(const layer layer, network net)
{
int k,b;
int w = layer.w;
@ -117,7 +117,7 @@ void forward_normalization_layer_gpu(const layer layer, network_state state)
for(b = 0; b < layer.batch; ++b){
float *squared = layer.squared_gpu + w*h*c*b;
float *norms = layer.norms_gpu + w*h*c*b;
float *input = state.input + w*h*c*b;
float *input = net.input_gpu + w*h*c*b;
pow_ongpu(w*h*c, 2, input, 1, squared, 1);
const_ongpu(w*h, layer.kappa, norms, 1);
@ -134,17 +134,17 @@ void forward_normalization_layer_gpu(const layer layer, network_state state)
}
}
pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1);
mul_ongpu(w*h*c*layer.batch, net.input_gpu, 1, layer.output_gpu, 1);
}
void backward_normalization_layer_gpu(const layer layer, network_state state)
void backward_normalization_layer_gpu(const layer layer, network net)
{
// TODO This is approximate ;-)
int w = layer.w;
int h = layer.h;
int c = layer.c;
pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1);
pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, net.delta_gpu, 1);
mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

View File

@ -7,13 +7,13 @@
layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
void resize_normalization_layer(layer *layer, int h, int w);
void forward_normalization_layer(const layer layer, network_state state);
void backward_normalization_layer(const layer layer, network_state state);
void forward_normalization_layer(const layer layer, network net);
void backward_normalization_layer(const layer layer, network net);
void visualize_normalization_layer(layer layer, char *window);
#ifdef GPU
void forward_normalization_layer_gpu(const layer layer, network_state state);
void backward_normalization_layer_gpu(const layer layer, network_state state);
void forward_normalization_layer_gpu(const layer layer, network net);
void backward_normalization_layer_gpu(const layer layer, network net);
#endif
#endif

View File

@ -154,8 +154,11 @@ layer parse_deconvolutional(list *options, size_params params)
batch=params.batch;
if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
int pad = option_find_int_quiet(options, "pad",0);
int padding = option_find_int_quiet(options, "padding",0);
if(pad) padding = size/2;
layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation, batch_normalize);
layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net.adam);
return l;
}
@ -546,7 +549,7 @@ void parse_net_options(list *options, network *net)
if(net->adam){
net->B1 = option_find_float(options, "B1", .9);
net->B2 = option_find_float(options, "B2", .999);
net->eps = option_find_float(options, "eps", .000001);
net->eps = option_find_float(options, "eps", .00000001);
}
net->h = option_find_int_quiet(options, "height",0);
@ -718,8 +721,18 @@ network parse_network_cfg(char *filename)
}
}
free_list(sections);
net.outputs = get_network_output_size(net);
net.output = get_network_output(net);
layer out = get_network_output_layer(net);
net.outputs = out.outputs;
net.truths = out.outputs;
if(net.layers[net.n-1].truths) net.truths = net.layers[net.n-1].truths;
net.output = out.output;
net.input = calloc(net.inputs*net.batch, sizeof(float));
net.truth = calloc(net.truths*net.batch, sizeof(float));
#ifdef GPU
net.output_gpu = out.output_gpu;
net.input_gpu = cuda_make_array(net.input, net.inputs*net.batch);
net.truth_gpu = cuda_make_array(net.truth, net.truths*net.batch);
#endif
if(workspace_size){
//printf("%ld\n", workspace_size);
#ifdef GPU

View File

@ -142,10 +142,10 @@ int entry_index(layer l, int batch, int location, int entry)
}
void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output);
void forward_region_layer(const layer l, network_state state)
void forward_region_layer(const layer l, network net)
{
int i,j,b,t,n;
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
#ifndef GPU
for (b = 0; b < l.batch; ++b){
@ -161,17 +161,17 @@ void forward_region_layer(const layer l, network_state state)
int count = 5;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_cpu(state.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count);
softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count);
count += group_size;
}
} else if (l.softmax){
int index = entry_index(l, 0, 0, 5);
softmax_cpu(state.input + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index);
softmax_cpu(net.input + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index);
}
#endif
memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
if(!state.train) return;
if(!net.train) return;
float avg_iou = 0;
float recall = 0;
float avg_cat = 0;
@ -184,9 +184,9 @@ void forward_region_layer(const layer l, network_state state)
if(l.softmax_tree){
int onlyclass = 0;
for(t = 0; t < 30; ++t){
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
box truth = float_to_box(net.truth + t*5 + b*l.truths, 1);
if(!truth.x) break;
int class = state.truth[t*5 + b*l.truths + 4];
int class = net.truth[t*5 + b*l.truths + 4];
float maxp = 0;
int maxi = 0;
if(truth.x > 100000 && truth.y > 100000){
@ -220,7 +220,7 @@ void forward_region_layer(const layer l, network_state state)
box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
float best_iou = 0;
for(t = 0; t < 30; ++t){
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
box truth = float_to_box(net.truth + t*5 + b*l.truths, 1);
if(!truth.x) break;
float iou = box_iou(pred, truth);
if (iou > best_iou) {
@ -234,7 +234,7 @@ void forward_region_layer(const layer l, network_state state)
l.delta[obj_index] = 0;
}
if(*(state.net.seen) < 12800){
if(*(net.seen) < 12800){
box truth = {0};
truth.x = (i + .5)/l.w;
truth.y = (j + .5)/l.h;
@ -246,7 +246,7 @@ void forward_region_layer(const layer l, network_state state)
}
}
for(t = 0; t < 30; ++t){
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
box truth = float_to_box(net.truth + t*5 + b*l.truths, 1);
if(!truth.x) break;
float best_iou = 0;
@ -289,7 +289,7 @@ void forward_region_layer(const layer l, network_state state)
l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]);
}
int class = state.truth[t*5 + b*l.truths + 4];
int class = net.truth[t*5 + b*l.truths + 4];
if (l.map) class = l.map[class];
int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 5);
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
@ -302,7 +302,7 @@ void forward_region_layer(const layer l, network_state state)
printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count);
}
void backward_region_layer(const layer l, network_state state)
void backward_region_layer(const layer l, network net)
{
/*
int b;
@ -311,11 +311,11 @@ void backward_region_layer(const layer l, network_state state)
int index = (b*size + 4)*l.w*l.h;
gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index);
}
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1);
*/
}
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh)
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh, int nomult)
{
int i,j,n,z;
float *predictions = l.output;
@ -358,10 +358,12 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
boxes[index].w *= (float)max/w;
boxes[index].h *= (float)max/h;
}
boxes[index].x *= w;
boxes[index].y *= h;
boxes[index].w *= w;
boxes[index].h *= h;
if(!nomult){
boxes[index].x *= w;
boxes[index].y *= h;
boxes[index].w *= w;
boxes[index].h *= h;
}
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5);
if(l.softmax_tree){
@ -383,6 +385,9 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5 + j);
float prob = scale*predictions[class_index];
probs[index][j] = (prob > thresh) ? prob : 0;
// TODO REMOVE
// if (j != 15 && j != 16) probs[index][j] = 0;
// if (j != 0) probs[index][j] = 0;
}
}
if(only_objectness){
@ -394,9 +399,9 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
#ifdef GPU
void forward_region_layer_gpu(const layer l, network_state state)
void forward_region_layer_gpu(const layer l, network net)
{
copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
copy_ongpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1);
int b, n;
for (b = 0; b < l.batch; ++b){
for(n = 0; n < l.n; ++n){
@ -412,40 +417,33 @@ void forward_region_layer_gpu(const layer l, network_state state)
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
int index = entry_index(l, 0, 0, count);
softmax_gpu(state.input + index, group_size, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
softmax_gpu(net.input_gpu + index, group_size, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
count += group_size;
}
} else if (l.softmax) {
int index = entry_index(l, 0, 0, 5);
//printf("%d\n", index);
softmax_gpu(state.input + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
softmax_gpu(net.input_gpu + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
}
if(!state.train || l.onlyforward){
if(!net.train || l.onlyforward){
cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
return;
}
float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
float *truth_cpu = 0;
if(state.truth){
if(net.truth_gpu){
int num_truth = l.batch*l.truths;
truth_cpu = calloc(num_truth, sizeof(float));
cuda_pull_array(state.truth, truth_cpu, num_truth);
cuda_pull_array(net.truth_gpu, truth_cpu, num_truth);
}
cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs);
network_state cpu_state = state;
cpu_state.train = state.train;
cpu_state.truth = truth_cpu;
cpu_state.input = in_cpu;
forward_region_layer(l, cpu_state);
cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs);
forward_region_layer(l, net);
//cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
free(cpu_state.input);
if(!state.train) return;
if(!net.train) return;
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
if(cpu_state.truth) free(cpu_state.truth);
}
void backward_region_layer_gpu(const layer l, network_state state)
void backward_region_layer_gpu(const layer l, network net)
{
int b, n;
for (b = 0; b < l.batch; ++b){
@ -456,7 +454,7 @@ void backward_region_layer_gpu(const layer l, network_state state)
gradient_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index);
}
}
axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

View File

@ -5,14 +5,14 @@
#include "network.h"
layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
void forward_region_layer(const layer l, network_state state);
void backward_region_layer(const layer l, network_state state);
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh);
void forward_region_layer(const layer l, network net);
void backward_region_layer(const layer l, network net);
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh, int nomult);
void resize_region_layer(layer *l, int w, int h);
#ifdef GPU
void forward_region_layer_gpu(const layer l, network_state state);
void backward_region_layer_gpu(layer l, network_state state);
void forward_region_layer_gpu(const layer l, network net);
void backward_region_layer_gpu(layer l, network net);
#endif
#endif

View File

@ -87,11 +87,11 @@ void resize_reorg_layer(layer *l, int w, int h)
#endif
}
void forward_reorg_layer(const layer l, network_state state)
void forward_reorg_layer(const layer l, network net)
{
int i;
if(l.flatten){
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
if(l.reverse){
flatten(l.output, l.w*l.h, l.c, l.batch, 0);
}else{
@ -99,74 +99,74 @@ void forward_reorg_layer(const layer l, network_state state)
}
} else if (l.extra) {
for(i = 0; i < l.batch; ++i){
copy_cpu(l.inputs, state.input + i*l.inputs, 1, l.output + i*l.outputs, 1);
copy_cpu(l.inputs, net.input + i*l.inputs, 1, l.output + i*l.outputs, 1);
}
} else if (l.reverse){
reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output);
reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output);
} else {
reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output);
reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output);
}
}
void backward_reorg_layer(const layer l, network_state state)
void backward_reorg_layer(const layer l, network net)
{
int i;
if(l.flatten){
memcpy(state.delta, l.delta, l.outputs*l.batch*sizeof(float));
memcpy(net.delta, l.delta, l.outputs*l.batch*sizeof(float));
if(l.reverse){
flatten(state.delta, l.w*l.h, l.c, l.batch, 1);
flatten(net.delta, l.w*l.h, l.c, l.batch, 1);
}else{
flatten(state.delta, l.w*l.h, l.c, l.batch, 0);
flatten(net.delta, l.w*l.h, l.c, l.batch, 0);
}
} else if(l.reverse){
reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta);
} else if (l.extra) {
for(i = 0; i < l.batch; ++i){
copy_cpu(l.inputs, l.delta + i*l.outputs, 1, state.delta + i*l.inputs, 1);
copy_cpu(l.inputs, l.delta + i*l.outputs, 1, net.delta + i*l.inputs, 1);
}
}else{
reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta);
}
}
#ifdef GPU
void forward_reorg_layer_gpu(layer l, network_state state)
void forward_reorg_layer_gpu(layer l, network net)
{
int i;
if(l.flatten){
if(l.reverse){
flatten_ongpu(state.input, l.w*l.h, l.c, l.batch, 0, l.output_gpu);
flatten_ongpu(net.input_gpu, l.w*l.h, l.c, l.batch, 0, l.output_gpu);
}else{
flatten_ongpu(state.input, l.w*l.h, l.c, l.batch, 1, l.output_gpu);
flatten_ongpu(net.input_gpu, l.w*l.h, l.c, l.batch, 1, l.output_gpu);
}
} else if (l.extra) {
for(i = 0; i < l.batch; ++i){
copy_ongpu(l.inputs, state.input + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1);
copy_ongpu(l.inputs, net.input_gpu + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1);
}
} else if (l.reverse) {
reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu);
reorg_ongpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu);
}else {
reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu);
reorg_ongpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu);
}
}
void backward_reorg_layer_gpu(layer l, network_state state)
void backward_reorg_layer_gpu(layer l, network net)
{
if(l.flatten){
if(l.reverse){
flatten_ongpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, state.delta);
flatten_ongpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, net.delta_gpu);
}else{
flatten_ongpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, state.delta);
flatten_ongpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, net.delta_gpu);
}
} else if (l.extra) {
int i;
for(i = 0; i < l.batch; ++i){
copy_ongpu(l.inputs, l.delta_gpu + i*l.outputs, 1, state.delta + i*l.inputs, 1);
copy_ongpu(l.inputs, l.delta_gpu + i*l.outputs, 1, net.delta_gpu + i*l.inputs, 1);
}
} else if(l.reverse){
reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu);
} else {
reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu);
}
}
#endif

View File

@ -8,12 +8,12 @@
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra);
void resize_reorg_layer(layer *l, int w, int h);
void forward_reorg_layer(const layer l, network_state state);
void backward_reorg_layer(const layer l, network_state state);
void forward_reorg_layer(const layer l, network net);
void backward_reorg_layer(const layer l, network net);
#ifdef GPU
void forward_reorg_layer_gpu(layer l, network_state state);
void backward_reorg_layer_gpu(layer l, network_state state);
void forward_reorg_layer_gpu(layer l, network net);
void backward_reorg_layer_gpu(layer l, network net);
#endif
#endif

View File

@ -151,7 +151,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
load_weights(&net, weightfile);
}
int inputs = get_network_input_size(net);
int inputs = net.inputs;
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int batch = net.batch;
int steps = net.time_steps;
@ -176,7 +176,9 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
p = get_rnn_data(text, offsets, inputs, size, streams, steps);
}
float loss = train_network_datum(net, p.x, p.y) / (batch);
memcpy(net.input, p.x, net.inputs*net.batch);
memcpy(net.truth, p.y, net.truths*net.batch);
float loss = train_network_datum(net) / (batch);
free(p.x);
free(p.y);
if (avg_loss < 0) avg_loss = loss;
@ -234,7 +236,7 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
if(weightfile){
load_weights(&net, weightfile);
}
int inputs = get_network_input_size(net);
int inputs = net.inputs;
int i, j;
for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
@ -291,7 +293,7 @@ void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int r
if(weightfile){
load_weights(&net, weightfile);
}
int inputs = get_network_input_size(net);
int inputs = net.inputs;
int i, j;
for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
@ -329,7 +331,7 @@ void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed)
if(weightfile){
load_weights(&net, weightfile);
}
int inputs = get_network_input_size(net);
int inputs = net.inputs;
int count = 0;
int words = 1;
@ -381,7 +383,7 @@ void valid_char_rnn(char *cfgfile, char *weightfile, char *seed)
if(weightfile){
load_weights(&net, weightfile);
}
int inputs = get_network_input_size(net);
int inputs = net.inputs;
int count = 0;
int words = 1;
@ -422,7 +424,7 @@ void vec_char_rnn(char *cfgfile, char *weightfile, char *seed)
if(weightfile){
load_weights(&net, weightfile);
}
int inputs = get_network_input_size(net);
int inputs = net.inputs;
int c;
int seed_len = strlen(seed);

View File

@ -80,10 +80,10 @@ void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, f
update_connected_layer(*(l.output_layer), batch, learning_rate, momentum, decay);
}
void forward_rnn_layer(layer l, network_state state)
void forward_rnn_layer(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_layer = *(l.input_layer);
layer self_layer = *(l.self_layer);
@ -92,17 +92,17 @@ void forward_rnn_layer(layer l, network_state state)
fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
if(state.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
for (i = 0; i < l.steps; ++i) {
s.input = state.input;
s.input = net.input;
forward_connected_layer(input_layer, s);
s.input = l.state;
forward_connected_layer(self_layer, s);
float *old_state = l.state;
if(state.train) l.state += l.hidden*l.batch;
if(net.train) l.state += l.hidden*l.batch;
if(l.shortcut){
copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
}else{
@ -114,17 +114,17 @@ void forward_rnn_layer(layer l, network_state state)
s.input = l.state;
forward_connected_layer(output_layer, s);
state.input += l.inputs*l.batch;
net.input += l.inputs*l.batch;
increment_layer(&input_layer, 1);
increment_layer(&self_layer, 1);
increment_layer(&output_layer, 1);
}
}
void backward_rnn_layer(layer l, network_state state)
void backward_rnn_layer(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_layer = *(l.input_layer);
layer self_layer = *(l.self_layer);
@ -160,8 +160,8 @@ void backward_rnn_layer(layer l, network_state state)
copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1);
if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1);
s.input = state.input + i*l.inputs*l.batch;
if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
s.input = net.input + i*l.inputs*l.batch;
if(net.delta) s.delta = net.delta + i*l.inputs*l.batch;
else s.delta = 0;
backward_connected_layer(input_layer, s);
@ -194,10 +194,10 @@ void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentu
update_connected_layer_gpu(*(l.output_layer), batch, learning_rate, momentum, decay);
}
void forward_rnn_layer_gpu(layer l, network_state state)
void forward_rnn_layer_gpu(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_layer = *(l.input_layer);
layer self_layer = *(l.self_layer);
@ -206,17 +206,17 @@ void forward_rnn_layer_gpu(layer l, network_state state)
fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1);
fill_ongpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1);
fill_ongpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1);
if(state.train) fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1);
if(net.train) fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1);
for (i = 0; i < l.steps; ++i) {
s.input = state.input;
s.input_gpu = net.input_gpu;
forward_connected_layer_gpu(input_layer, s);
s.input = l.state_gpu;
s.input_gpu = l.state_gpu;
forward_connected_layer_gpu(self_layer, s);
float *old_state = l.state_gpu;
if(state.train) l.state_gpu += l.hidden*l.batch;
if(net.train) l.state_gpu += l.hidden*l.batch;
if(l.shortcut){
copy_ongpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1);
}else{
@ -225,20 +225,20 @@ void forward_rnn_layer_gpu(layer l, network_state state)
axpy_ongpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1);
axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
s.input = l.state_gpu;
s.input_gpu = l.state_gpu;
forward_connected_layer_gpu(output_layer, s);
state.input += l.inputs*l.batch;
net.input_gpu += l.inputs*l.batch;
increment_layer(&input_layer, 1);
increment_layer(&self_layer, 1);
increment_layer(&output_layer, 1);
}
}
void backward_rnn_layer_gpu(layer l, network_state state)
void backward_rnn_layer_gpu(layer l, network net)
{
network_state s = {0};
s.train = state.train;
network s = net;
s.train = net.train;
int i;
layer input_layer = *(l.input_layer);
layer self_layer = *(l.self_layer);
@ -249,24 +249,24 @@ void backward_rnn_layer_gpu(layer l, network_state state)
l.state_gpu += l.hidden*l.batch*l.steps;
for (i = l.steps-1; i >= 0; --i) {
s.input = l.state_gpu;
s.delta = self_layer.delta_gpu;
s.input_gpu = l.state_gpu;
s.delta_gpu = self_layer.delta_gpu;
backward_connected_layer_gpu(output_layer, s);
l.state_gpu -= l.hidden*l.batch;
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
s.input = l.state_gpu;
s.delta = self_layer.delta_gpu - l.hidden*l.batch;
if (i == 0) s.delta = 0;
s.input_gpu = l.state_gpu;
s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch;
if (i == 0) s.delta_gpu = 0;
backward_connected_layer_gpu(self_layer, s);
//copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
s.input = state.input + i*l.inputs*l.batch;
if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
else s.delta = 0;
s.input_gpu = net.input_gpu + i*l.inputs*l.batch;
if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch;
else s.delta_gpu = 0;
backward_connected_layer_gpu(input_layer, s);
increment_layer(&input_layer, -1);

View File

@ -9,13 +9,13 @@
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);
void forward_rnn_layer(layer l, network_state state);
void backward_rnn_layer(layer l, network_state state);
void forward_rnn_layer(layer l, network net);
void backward_rnn_layer(layer l, network net);
void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);
#ifdef GPU
void forward_rnn_layer_gpu(layer l, network_state state);
void backward_rnn_layer_gpu(layer l, network_state state);
void forward_rnn_layer_gpu(layer l, network net);
void backward_rnn_layer_gpu(layer l, network net);
void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_rnn_layer(layer l);
void pull_rnn_layer(layer l);

View File

@ -103,7 +103,9 @@ void train_vid_rnn(char *cfgfile, char *weightfile)
time=clock();
float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps);
float loss = train_network_datum(net, p.x, p.y) / (net.batch);
memcpy(net.input, p.x, net.inputs*net.batch);
memcpy(net.truth, p.y, net.truths*net.batch);
float loss = train_network_datum(net) / (net.batch);
free(p.x);

View File

@ -70,13 +70,13 @@ void resize_route_layer(route_layer *l, network *net)
}
void forward_route_layer(const route_layer l, network_state state)
void forward_route_layer(const route_layer l, network net)
{
int i, j;
int offset = 0;
for(i = 0; i < l.n; ++i){
int index = l.input_layers[i];
float *input = state.net.layers[index].output;
float *input = net.layers[index].output;
int input_size = l.input_sizes[i];
for(j = 0; j < l.batch; ++j){
copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1);
@ -85,13 +85,13 @@ void forward_route_layer(const route_layer l, network_state state)
}
}
void backward_route_layer(const route_layer l, network_state state)
void backward_route_layer(const route_layer l, network net)
{
int i, j;
int offset = 0;
for(i = 0; i < l.n; ++i){
int index = l.input_layers[i];
float *delta = state.net.layers[index].delta;
float *delta = net.layers[index].delta;
int input_size = l.input_sizes[i];
for(j = 0; j < l.batch; ++j){
axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1);
@ -101,13 +101,13 @@ void backward_route_layer(const route_layer l, network_state state)
}
#ifdef GPU
void forward_route_layer_gpu(const route_layer l, network_state state)
void forward_route_layer_gpu(const route_layer l, network net)
{
int i, j;
int offset = 0;
for(i = 0; i < l.n; ++i){
int index = l.input_layers[i];
float *input = state.net.layers[index].output_gpu;
float *input = net.layers[index].output_gpu;
int input_size = l.input_sizes[i];
for(j = 0; j < l.batch; ++j){
copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1);
@ -116,13 +116,13 @@ void forward_route_layer_gpu(const route_layer l, network_state state)
}
}
void backward_route_layer_gpu(const route_layer l, network_state state)
void backward_route_layer_gpu(const route_layer l, network net)
{
int i, j;
int offset = 0;
for(i = 0; i < l.n; ++i){
int index = l.input_layers[i];
float *delta = state.net.layers[index].delta_gpu;
float *delta = net.layers[index].delta_gpu;
int input_size = l.input_sizes[i];
for(j = 0; j < l.batch; ++j){
axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1);

View File

@ -6,13 +6,13 @@
typedef layer route_layer;
route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size);
void forward_route_layer(const route_layer l, network_state state);
void backward_route_layer(const route_layer l, network_state state);
void forward_route_layer(const route_layer l, network net);
void backward_route_layer(const route_layer l, network net);
void resize_route_layer(route_layer *l, network *net);
#ifdef GPU
void forward_route_layer_gpu(const route_layer l, network_state state);
void backward_route_layer_gpu(const route_layer l, network_state state);
void forward_route_layer_gpu(const route_layer l, network net);
void backward_route_layer_gpu(const route_layer l, network net);
#endif
#endif

View File

@ -36,32 +36,32 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
return l;
}
void forward_shortcut_layer(const layer l, network_state state)
void forward_shortcut_layer(const layer l, network net)
{
copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output);
copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output);
activate_array(l.output, l.outputs*l.batch, l.activation);
}
void backward_shortcut_layer(const layer l, network_state state)
void backward_shortcut_layer(const layer l, network net)
{
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1);
shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta);
axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, net.delta, 1);
shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, net.layers[l.index].delta);
}
#ifdef GPU
void forward_shortcut_layer_gpu(const layer l, network_state state)
void forward_shortcut_layer_gpu(const layer l, network net)
{
copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu);
copy_ongpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu);
activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
void backward_shortcut_layer_gpu(const layer l, network_state state)
void backward_shortcut_layer_gpu(const layer l, network net)
{
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1);
shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu);
axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, net.delta_gpu, 1);
shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, net.layers[l.index].delta_gpu);
}
#endif

View File

@ -5,12 +5,12 @@
#include "network.h"
layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2);
void forward_shortcut_layer(const layer l, network_state state);
void backward_shortcut_layer(const layer l, network_state state);
void forward_shortcut_layer(const layer l, network net);
void backward_shortcut_layer(const layer l, network net);
#ifdef GPU
void forward_shortcut_layer_gpu(const layer l, network_state state);
void backward_shortcut_layer_gpu(const layer l, network_state state);
void forward_shortcut_layer_gpu(const layer l, network net);
void backward_shortcut_layer_gpu(const layer l, network net);
#endif
#endif

View File

@ -32,24 +32,24 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
return l;
}
void forward_softmax_layer(const softmax_layer l, network_state state)
void forward_softmax_layer(const softmax_layer l, network net)
{
if(l.softmax_tree){
int i;
int count = 0;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_cpu(state.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count);
softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count);
count += group_size;
}
} else {
softmax_cpu(state.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output);
softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output);
}
}
void backward_softmax_layer(const softmax_layer l, network_state state)
void backward_softmax_layer(const softmax_layer l, network net)
{
axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, state.delta, 1);
axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1);
}
#ifdef GPU
@ -59,24 +59,24 @@ void pull_softmax_layer_output(const softmax_layer layer)
cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch);
}
void forward_softmax_layer_gpu(const softmax_layer l, network_state state)
void forward_softmax_layer_gpu(const softmax_layer l, network net)
{
if(l.softmax_tree){
int i;
int count = 0;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_gpu(state.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count);
softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count);
count += group_size;
}
} else {
softmax_gpu(state.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
}
}
void backward_softmax_layer_gpu(const softmax_layer layer, network_state state)
void backward_softmax_layer_gpu(const softmax_layer layer, network net)
{
axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1);
axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

View File

@ -7,13 +7,13 @@ typedef layer softmax_layer;
void softmax_array(float *input, int n, float temp, float *output);
softmax_layer make_softmax_layer(int batch, int inputs, int groups);
void forward_softmax_layer(const softmax_layer l, network_state state);
void backward_softmax_layer(const softmax_layer l, network_state state);
void forward_softmax_layer(const softmax_layer l, network net);
void backward_softmax_layer(const softmax_layer l, network net);
#ifdef GPU
void pull_softmax_layer_output(const softmax_layer l);
void forward_softmax_layer_gpu(const softmax_layer l, network_state state);
void backward_softmax_layer_gpu(const softmax_layer l, network_state state);
void forward_softmax_layer_gpu(const softmax_layer l, network net);
void backward_softmax_layer_gpu(const softmax_layer l, network net);
#endif
#endif