:eyeofthetiger::noseofthetiger::eyeofthetiger2:

This commit is contained in:
Joseph Redmon 2017-03-26 23:42:30 -07:00
parent b61bcf544e
commit 60e952ba69
53 changed files with 3486 additions and 661 deletions

View File

@ -10,7 +10,7 @@ ARCH= -gencode arch=compute_20,code=[sm_20,sm_21] \
-gencode arch=compute_52,code=[sm_52,compute_52]
# This is what I use, uncomment if you know your arch and want to specify
# ARCH= -gencode arch=compute_52,code=compute_52
ARCH= -gencode arch=compute_52,code=compute_52
VPATH=./src/
EXEC=darknet
@ -48,10 +48,10 @@ CFLAGS+= -DCUDNN
LDFLAGS+= -lcudnn
endif
OBJ=gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o tree.o
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o regressor.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o lsd.o super.o voxel.o tree.o
ifeq ($(GPU), 1)
LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
endif
OBJS = $(addprefix $(OBJDIR), $(OBJ))

View File

@ -1,7 +1,7 @@
classes= 80
train = /home/pjreddie/data/coco/trainvalno5k.txt
#valid = coco_testdev
valid = data/coco_val_5k.list
valid = coco_testdev
#valid = data/coco_val_5k.list
names = data/coco.names
backup = /home/pjreddie/backup/
eval=coco

135
cfg/go.cfg Normal file
View File

@ -0,0 +1,135 @@
[net]
batch=512
subdivisions=1
height=19
width=19
channels=1
momentum=0.9
decay=0.0005
burn_in=1000
learning_rate=0.1
policy=poly
power=4
max_batches=10000000
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=1
size=1
stride=1
pad=1
activation=linear
[reorg]
extra=1
stride=1
[softmax]
[cost]
type=sse

View File

@ -7,13 +7,13 @@ channels=1
momentum=0.9
decay=0.0005
learning_rate=0.1
learning_rate=0.01
policy=poly
power=4
max_batches=400000
max_batches=100000
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -21,7 +21,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -29,7 +29,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -37,7 +37,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -45,7 +45,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -53,7 +53,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -61,7 +61,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -69,7 +69,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -77,7 +77,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -85,7 +85,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -93,7 +93,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -101,7 +101,7 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
@ -109,14 +109,13 @@ activation=relu
batch_normalize=1
[convolutional]
filters=192
filters=256
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=1
size=1
@ -124,6 +123,10 @@ stride=1
pad=1
activation=linear
[reorg]
extra=1
stride=1
[softmax]
[cost]

View File

@ -12,7 +12,7 @@ exposure = 1.5
hue=.1
learning_rate=0.001
max_batches = 40100
max_batches = 40200
policy=steps
steps=-1,100,20000,30000
scales=.1,10,.1,.1

244
cfg/yolo-voc.2.0.cfg Normal file
View File

@ -0,0 +1,244 @@
[net]
batch=64
subdivisions=8
height=416
width=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.0001
max_batches = 45000
policy=steps
steps=100,25000,35000
scales=10,.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[route]
layers=-9
[reorg]
stride=2
[route]
layers=-1,-3
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=125
activation=linear
[region]
anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
bias_match=1
classes=20
coords=4
num=5
softmax=1
jitter=.2
rescore=1
object_scale=5
noobject_scale=1
class_scale=1
coord_scale=1
absolute=1
thresh = .6
random=0

View File

@ -11,11 +11,12 @@ saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.0001
max_batches = 45000
learning_rate=0.001
burn_in=1000
max_batches = 80200
policy=steps
steps=100,25000,35000
scales=10,.1,.1
steps=40000,60000
scales=.1,.1
[convolutional]
batch_normalize=1
@ -203,11 +204,19 @@ activation=leaky
[route]
layers=-9
[convolutional]
batch_normalize=1
size=1
stride=1
pad=1
filters=64
activation=leaky
[reorg]
stride=2
[route]
layers=-1,-3
layers=-1,-4
[convolutional]
batch_normalize=1
@ -224,14 +233,15 @@ pad=1
filters=125
activation=linear
[region]
anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
bias_match=1
classes=20
coords=4
num=5
softmax=1
jitter=.2
jitter=.3
rescore=1
object_scale=5
@ -241,4 +251,4 @@ coord_scale=1
absolute=1
thresh = .6
random=0
random=1

244
cfg/yolo.2.0.cfg Normal file
View File

@ -0,0 +1,244 @@
[net]
batch=1
subdivisions=1
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
max_batches = 120000
policy=steps
steps=-1,100,80000,100000
scales=.1,10,.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[route]
layers=-9
[reorg]
stride=2
[route]
layers=-1,-3
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=425
activation=linear
[region]
anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741
bias_match=1
classes=80
coords=4
num=5
softmax=1
jitter=.2
rescore=1
object_scale=5
noobject_scale=1
class_scale=1
coord_scale=1
absolute=1
thresh = .6
random=0

View File

@ -1,8 +1,8 @@
[net]
batch=1
subdivisions=1
width=416
height=416
batch=64
subdivisions=8
height=608
width=608
channels=3
momentum=0.9
decay=0.0005
@ -12,10 +12,11 @@ exposure = 1.5
hue=.1
learning_rate=0.001
max_batches = 120000
burn_in=1000
max_batches = 500200
policy=steps
steps=-1,100,80000,100000
scales=.1,10,.1,.1
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
@ -203,11 +204,19 @@ activation=leaky
[route]
layers=-9
[convolutional]
batch_normalize=1
size=1
stride=1
pad=1
filters=64
activation=leaky
[reorg]
stride=2
[route]
layers=-1,-3
layers=-1,-4
[convolutional]
batch_normalize=1
@ -224,14 +233,15 @@ pad=1
filters=425
activation=linear
[region]
anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741
anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
bias_match=1
classes=80
coords=4
num=5
softmax=1
jitter=.2
jitter=.3
rescore=1
object_scale=5
@ -241,4 +251,4 @@ coord_scale=1
absolute=1
thresh = .6
random=0
random=1

View File

@ -10,10 +10,10 @@ classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat"
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
@ -34,7 +34,7 @@ def convert_annotation(year, image_id):
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
@ -54,3 +54,6 @@ for year, image_set in sets:
convert_annotation(year, image_id)
list_file.close()
os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt")
os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")

View File

@ -1,3 +1,4 @@
#include "convolutional_layer.h"
#include "batchnorm_layer.h"
#include "blas.h"
#include <stdio.h>
@ -5,55 +6,67 @@
layer make_batchnorm_layer(int batch, int w, int h, int c)
{
fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
layer layer = {0};
layer.type = BATCHNORM;
layer.batch = batch;
layer.h = layer.out_h = h;
layer.w = layer.out_w = w;
layer.c = layer.out_c = c;
layer.output = calloc(h * w * c * batch, sizeof(float));
layer.delta = calloc(h * w * c * batch, sizeof(float));
layer.inputs = w*h*c;
layer.outputs = layer.inputs;
layer l = {0};
l.type = BATCHNORM;
l.batch = batch;
l.h = l.out_h = h;
l.w = l.out_w = w;
l.c = l.out_c = c;
l.output = calloc(h * w * c * batch, sizeof(float));
l.delta = calloc(h * w * c * batch, sizeof(float));
l.inputs = w*h*c;
l.outputs = l.inputs;
layer.scales = calloc(c, sizeof(float));
layer.scale_updates = calloc(c, sizeof(float));
l.scales = calloc(c, sizeof(float));
l.scale_updates = calloc(c, sizeof(float));
l.biases = calloc(c, sizeof(float));
l.bias_updates = calloc(c, sizeof(float));
int i;
for(i = 0; i < c; ++i){
layer.scales[i] = 1;
l.scales[i] = 1;
}
layer.mean = calloc(c, sizeof(float));
layer.variance = calloc(c, sizeof(float));
l.mean = calloc(c, sizeof(float));
l.variance = calloc(c, sizeof(float));
layer.rolling_mean = calloc(c, sizeof(float));
layer.rolling_variance = calloc(c, sizeof(float));
l.rolling_mean = calloc(c, sizeof(float));
l.rolling_variance = calloc(c, sizeof(float));
layer.forward = forward_batchnorm_layer;
layer.backward = backward_batchnorm_layer;
l.forward = forward_batchnorm_layer;
l.backward = backward_batchnorm_layer;
#ifdef GPU
layer.forward_gpu = forward_batchnorm_layer_gpu;
layer.backward_gpu = backward_batchnorm_layer_gpu;
l.forward_gpu = forward_batchnorm_layer_gpu;
l.backward_gpu = backward_batchnorm_layer_gpu;
layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);
l.output_gpu = cuda_make_array(l.output, h * w * c * batch);
l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch);
layer.scales_gpu = cuda_make_array(layer.scales, c);
layer.scale_updates_gpu = cuda_make_array(layer.scale_updates, c);
l.biases_gpu = cuda_make_array(l.biases, c);
l.bias_updates_gpu = cuda_make_array(l.bias_updates, c);
layer.mean_gpu = cuda_make_array(layer.mean, c);
layer.variance_gpu = cuda_make_array(layer.variance, c);
l.scales_gpu = cuda_make_array(l.scales, c);
l.scale_updates_gpu = cuda_make_array(l.scale_updates, c);
layer.rolling_mean_gpu = cuda_make_array(layer.mean, c);
layer.rolling_variance_gpu = cuda_make_array(layer.variance, c);
l.mean_gpu = cuda_make_array(l.mean, c);
l.variance_gpu = cuda_make_array(l.variance, c);
layer.mean_delta_gpu = cuda_make_array(layer.mean, c);
layer.variance_delta_gpu = cuda_make_array(layer.variance, c);
l.rolling_mean_gpu = cuda_make_array(l.mean, c);
l.rolling_variance_gpu = cuda_make_array(l.variance, c);
layer.x_gpu = cuda_make_array(layer.output, layer.batch*layer.outputs);
layer.x_norm_gpu = cuda_make_array(layer.output, layer.batch*layer.outputs);
l.mean_delta_gpu = cuda_make_array(l.mean, c);
l.variance_delta_gpu = cuda_make_array(l.variance, c);
l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs);
l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs);
#ifdef CUDNN
cudnnCreateTensorDescriptor(&l.normTensorDesc);
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
#endif
#endif
return layer;
return l;
}
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
@ -108,7 +121,7 @@ void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_del
for(f = 0; f < filters; ++f){
for(k = 0; k < spatial; ++k){
int index = j*filters*spatial + f*spatial + k;
delta[index] = delta[index] * 1./(sqrt(variance[f]) + .00001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
}
}
}
@ -130,10 +143,10 @@ void forward_batchnorm_layer(layer l, network_state state)
mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);
scal_cpu(l.out_c, .9, l.rolling_mean, 1);
axpy_cpu(l.out_c, .1, l.mean, 1, l.rolling_mean, 1);
scal_cpu(l.out_c, .9, l.rolling_variance, 1);
axpy_cpu(l.out_c, .1, l.variance, 1, l.rolling_variance, 1);
scal_cpu(l.out_c, .99, l.rolling_mean, 1);
axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1);
scal_cpu(l.out_c, .99, l.rolling_variance, 1);
axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);
copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);
@ -142,10 +155,12 @@ void forward_batchnorm_layer(layer l, network_state state)
normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
}
scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
}
void backward_batchnorm_layer(const layer l, network_state state)
{
backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h);
backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates);
scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
@ -179,6 +194,28 @@ void forward_batchnorm_layer_gpu(layer l, network_state state)
l.out_h = l.out_w = 1;
}
if (state.train) {
#ifdef CUDNN
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
float one = 1;
float zero = 0;
cudnnBatchNormalizationForwardTraining(cudnn_handle(),
CUDNN_BATCHNORM_SPATIAL,
&one,
&zero,
l.dstTensorDesc,
l.x_gpu,
l.dstTensorDesc,
l.output_gpu,
l.normTensorDesc,
l.scales_gpu,
l.biases_gpu,
.01,
l.rolling_mean_gpu,
l.rolling_variance_gpu,
.00001,
l.mean_gpu,
l.variance_gpu);
#else
fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);
fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu);
@ -190,15 +227,45 @@ void forward_batchnorm_layer_gpu(layer l, network_state state)
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1);
scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
#endif
} else {
normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
}
scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
}
void backward_batchnorm_layer_gpu(const layer l, network_state state)
{
#ifdef CUDNN
float one = 1;
float zero = 0;
cudnnBatchNormalizationBackward(cudnn_handle(),
CUDNN_BATCHNORM_SPATIAL,
&one,
&zero,
&one,
&one,
l.dstTensorDesc,
l.x_gpu,
l.dstTensorDesc,
l.delta_gpu,
l.dstTensorDesc,
l.x_norm_gpu,
l.normTensorDesc,
l.scales_gpu,
l.scale_updates_gpu,
l.bias_updates_gpu,
.00001,
l.mean_gpu,
l.variance_gpu);
copy_ongpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1);
#else
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h);
backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu);
scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
@ -206,6 +273,7 @@ void backward_batchnorm_layer_gpu(const layer l, network_state state)
fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu);
fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu);
normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
#endif
if(l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
}
#endif

View File

@ -179,11 +179,21 @@ void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
}
else {
error[i] = 2*abs_val - 1;
delta[i] = (diff < 0) ? -1 : 1;
delta[i] = (diff < 0) ? 1 : -1;
}
}
}
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
int i;
for(i = 0; i < n; ++i){
float diff = truth[i] - pred[i];
error[i] = fabs(diff);
delta[i] = diff > 0 ? 1 : -1;
}
}
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
int i;
@ -202,21 +212,32 @@ float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
return dot;
}
void softmax(float *input, int n, float temp, float *output)
void softmax(float *input, int n, float temp, int stride, float *output)
{
int i;
float sum = 0;
float largest = -FLT_MAX;
for(i = 0; i < n; ++i){
if(input[i] > largest) largest = input[i];
if(input[i*stride] > largest) largest = input[i*stride];
}
for(i = 0; i < n; ++i){
float e = exp(input[i]/temp - largest/temp);
float e = exp(input[i*stride]/temp - largest/temp);
sum += e;
output[i] = e;
output[i*stride] = e;
}
for(i = 0; i < n; ++i){
output[i] /= sum;
output[i*stride] /= sum;
}
}
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
{
int g, b;
for(b = 0; b < batch; ++b){
for(g = 0; g < groups; ++g){
softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
}
}
}

View File

@ -33,9 +33,11 @@ void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_del
void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
void softmax(float *input, int n, float temp, float *output);
void softmax(float *input, int n, float temp, int stride, float *output);
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
#ifdef GPU
#include "cuda.h"
@ -45,6 +47,7 @@ void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
void scal_ongpu(int N, float ALPHA, float * X, int INCX);
void add_ongpu(int N, float ALPHA, float * X, int INCX);
void supp_ongpu(int N, float ALPHA, float * X, int INCX);
void mask_ongpu(int N, float * X, float mask_num, float * mask);
void const_ongpu(int N, float ALPHA, float *X, int INCX);
@ -72,13 +75,14 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
void l2_gpu(int n, float *pred, float *truth, float *delta, float *error);
void l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc);
void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c);
void mult_add_into_gpu(int num, float *a, float *b, float *c);
void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output);
void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t);
void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out);

View File

@ -161,7 +161,7 @@ __global__ void normalize_kernel(int N, float *x, float *mean, float *variance,
if (index >= N) return;
int f = (index/spatial)%filters;
x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f);
x[index] = (x[index] - mean[f])/(sqrt(variance[f] + .00001f));
}
__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
@ -170,7 +170,7 @@ __global__ void normalize_delta_kernel(int N, float *x, float *mean, float *vari
if (index >= N) return;
int f = (index/spatial)%filters;
delta[index] = delta[index] * 1./(sqrt(variance[f]) + .000001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
}
extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
@ -192,7 +192,7 @@ __global__ void variance_delta_kernel(float *x, float *delta, float *mean, floa
variance_delta[i] += delta[index]*(x[index] - mean[i]);
}
}
variance_delta[i] *= -.5 * pow(variance[i] + .000001f, (float)(-3./2.));
variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.));
}
__global__ void accumulate_kernel(float *x, int n, int groups, float *sum)
@ -224,12 +224,14 @@ __global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch,
}
}
__syncthreads();
if(id == 0){
mean_delta[filter] = 0;
for(i = 0; i < threads; ++i){
mean_delta[filter] += local[i];
}
mean_delta[filter] *= (-1./sqrt(variance[filter] + .000001f));
mean_delta[filter] *= (-1./sqrt(variance[filter] + .00001f));
}
}
@ -252,12 +254,14 @@ __global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean,
}
}
__syncthreads();
if(id == 0){
variance_delta[filter] = 0;
for(i = 0; i < threads; ++i){
variance_delta[filter] += local[i];
}
variance_delta[filter] *= -.5 * pow(variance[filter] + .000001f, (float)(-3./2.));
variance_delta[filter] *= -.5 * pow(variance[filter] + .00001f, (float)(-3./2.));
}
}
@ -274,7 +278,7 @@ __global__ void mean_delta_kernel(float *delta, float *variance, int batch, int
mean_delta[i] += delta[index];
}
}
mean_delta[i] *= (-1./sqrt(variance[i] + .000001f));
mean_delta[i] *= (-1./sqrt(variance[i] + .00001f));
}
extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
@ -391,6 +395,12 @@ __global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
}
}
__global__ void add_kernel(int N, float ALPHA, float *X, int INCX)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(i < N) X[i*INCX] += ALPHA;
}
__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@ -447,6 +457,8 @@ __global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial,
}
}
__syncthreads();
if(id == 0){
mean[filter] = 0;
for(i = 0; i < threads; ++i){
@ -475,6 +487,8 @@ __global__ void fast_variance_kernel(float *x, float *mean, int batch, int filt
}
}
__syncthreads();
if(id == 0){
variance[filter] = 0;
for(i = 0; i < threads; ++i){
@ -593,6 +607,12 @@ extern "C" void constrain_ongpu(int N, float ALPHA, float * X, int INCX)
}
extern "C" void add_ongpu(int N, float ALPHA, float * X, int INCX)
{
add_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
check_error(cudaPeekAtLastError());
}
extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX)
{
scal_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
@ -658,7 +678,7 @@ __global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta,
}
else {
error[i] = 2*abs_val - 1;
delta[i] = (diff < 0) ? -1 : 1;
delta[i] = (diff > 0) ? 1 : -1;
}
}
}
@ -685,6 +705,23 @@ extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *er
check_error(cudaPeekAtLastError());
}
__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(i < n){
float diff = truth[i] - pred[i];
error[i] = abs(diff);
delta[i] = (diff > 0) ? 1 : -1;
}
}
extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error)
{
l1_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
check_error(cudaPeekAtLastError());
}
__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c)
@ -732,36 +769,36 @@ extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c)
}
__device__ void softmax_device(int n, float *input, float temp, float *output)
__device__ void softmax_device(float *input, int n, float temp, int stride, float *output)
{
int i;
float sum = 0;
float largest = -INFINITY;
for(i = 0; i < n; ++i){
int val = input[i];
int val = input[i*stride];
largest = (val>largest) ? val : largest;
}
for(i = 0; i < n; ++i){
float e = exp(input[i]/temp - largest/temp);
float e = exp(input[i*stride]/temp - largest/temp);
sum += e;
output[i] = e;
output[i*stride] = e;
}
for(i = 0; i < n; ++i){
output[i] /= sum;
output[i*stride] /= sum;
}
}
__global__ void softmax_kernel(int n, int offset, int batch, float *input, float temp, float *output)
__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
{
int b = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(b >= batch) return;
softmax_device(n, input + b*offset, temp, output + b*offset);
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= batch*groups) return;
int b = id / groups;
int g = id % groups;
softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
}
extern "C" void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output)
extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
{
int inputs = n;
int batch = groups;
softmax_kernel<<<cuda_gridsize(batch), BLOCK>>>(inputs, offset, batch, input, temp, output);
softmax_kernel<<<cuda_gridsize(batch*groups), BLOCK>>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output);
check_error(cudaPeekAtLastError());
}

View File

@ -3,13 +3,13 @@
#include <math.h>
#include <stdlib.h>
box float_to_box(float *f)
box float_to_box(float *f, int stride)
{
box b;
b.x = f[0];
b.y = f[1];
b.w = f[2];
b.h = f[3];
b.y = f[1*stride];
b.w = f[2*stride];
b.h = f[3*stride];
return b;
}

View File

@ -9,7 +9,7 @@ typedef struct{
float dx, dy, dw, dh;
} dbox;
box float_to_box(float *f);
box float_to_box(float *f, int stride);
float box_iou(box a, box b);
float box_rmse(box a, box b);
dbox diou(box a, box b);

View File

@ -379,7 +379,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
float *pred = calloc(classes, sizeof(float));
for(j = 0; j < 10; ++j){
float *p = network_predict(net, images[j].data);
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1, 1);
axpy_cpu(classes, 1, p, 1, pred, 1);
free_image(images[j]);
}
@ -440,7 +440,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
//show_image(crop, "cropped");
//cvWaitKey(0);
float *pred = network_predict(net, resized.data);
if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1);
if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1, 1);
free_image(im);
free_image(resized);
@ -502,7 +502,7 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
//show_image(crop, "cropped");
//cvWaitKey(0);
float *pred = network_predict(net, crop.data);
if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1);
if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1, 1);
if(resized.data != im.data) free_image(resized);
free_image(im);
@ -563,7 +563,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
image r = resize_min(im, scales[j]);
resize_network(&net, r.w, r.h);
float *p = network_predict(net, r.data);
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1 , 1);
axpy_cpu(classes, 1, p, 1, pred, 1);
flip_image(r);
p = network_predict(net, r.data);
@ -703,7 +703,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
float *X = r.data;
time=clock();
float *predictions = network_predict(net, X);
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 0);
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 0, 1);
top_k(predictions, net.outputs, top, indexes);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
for(i = 0; i < top; ++i){
@ -1084,7 +1084,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
show_image(in, "Classifier");
float *predictions = network_predict(net, in_s.data);
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1);
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1, 1);
top_predictions(net, top, indexes);
printf("\033[2J");

View File

@ -117,26 +117,70 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
if (l.batch_normalize) {
forward_batchnorm_layer_gpu(l, state);
} else {
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
}
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
//if(l.dot > 0) dot_error_gpu(l);
if(l.binary || l.xnor) swap_binary(&l);
}
__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(id >= n) return;
int j = id % w;
id /= w;
int i = id % h;
id /= h;
int k = id % c;
id /= c;
int b = id;
int w_offset = -(size/2.);
int h_offset = -(size/2.);
int out_index = j + w*(i + h*(k + c*b));
int l, m;
for(l = 0; l < size; ++l){
for(m = 0; m < size; ++m){
int cur_h = h_offset + i + l;
int cur_w = w_offset + j + m;
int index = cur_w + w*(cur_h + h*(k + b*c));
int valid = (cur_h >= 0 && cur_h < h &&
cur_w >= 0 && cur_w < w);
delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0;
}
}
}
extern "C" void smooth_layer(layer l, int size, float rate)
{
int h = l.out_h;
int w = l.out_w;
int c = l.out_c;
size_t n = h*w*c*l.batch;
smooth_kernel<<<cuda_gridsize(n), BLOCK>>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu);
check_error(cudaPeekAtLastError());
}
void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
{
if(l.smooth){
smooth_layer(l, 5, l.smooth);
}
//constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
if(l.batch_normalize){
backward_batchnorm_layer_gpu(l, state);
//axpy_ongpu(l.outputs*l.batch, -state.net.decay, l.x_gpu, 1, l.delta_gpu, 1);
} else {
//axpy_ongpu(l.outputs*l.batch, -state.net.decay, l.output_gpu, 1, l.delta_gpu, 1);
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
}
float *original_input = state.input;

View File

@ -23,11 +23,11 @@ void swap_binary(convolutional_layer *l)
l->weights = l->binary_weights;
l->binary_weights = swap;
#ifdef GPU
#ifdef GPU
swap = l->weights_gpu;
l->weights_gpu = l->binary_weights_gpu;
l->binary_weights_gpu = swap;
#endif
#endif
}
void binarize_weights(float *weights, int n, int size, float *binary)
@ -80,23 +80,15 @@ int convolutional_out_width(convolutional_layer l)
image get_convolutional_image(convolutional_layer l)
{
int h,w,c;
h = convolutional_out_height(l);
w = convolutional_out_width(l);
c = l.n;
return float_to_image(w,h,c,l.output);
return float_to_image(l.out_w,l.out_h,l.out_c,l.output);
}
image get_convolutional_delta(convolutional_layer l)
{
int h,w,c;
h = convolutional_out_height(l);
w = convolutional_out_width(l);
c = l.n;
return float_to_image(w,h,c,l.delta);
return float_to_image(l.out_w,l.out_h,l.out_c,l.delta);
}
size_t get_workspace_size(layer l){
static size_t get_workspace_size(layer l){
#ifdef CUDNN
if(gpu_index >= 0){
size_t most = 0;
@ -127,7 +119,7 @@ size_t get_workspace_size(layer l){
if (s > most) most = s;
return most;
}
#endif
#endif
return (size_t)l.out_h*l.out_w*l.size*l.size*l.c*sizeof(float);
}
@ -141,6 +133,7 @@ void cudnn_convolutional_setup(layer *l)
cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
@ -198,8 +191,8 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
// float scale = 1./sqrt(size*size*c);
float scale = sqrt(2./(size*size*c));
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
int out_h = convolutional_out_height(l);
int out_w = convolutional_out_width(l);
int out_h = convolutional_out_height(l);
l.out_h = out_h;
l.out_w = out_w;
l.out_c = n;
@ -291,6 +284,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
}
#ifdef CUDNN
cudnnCreateTensorDescriptor(&l.normTensorDesc);
cudnnCreateTensorDescriptor(&l.srcTensorDesc);
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
cudnnCreateFilterDescriptor(&l.weightDesc);
@ -426,8 +420,8 @@ void backward_bias(float *bias_updates, float *delta, int batch, int n, int size
void forward_convolutional_layer(convolutional_layer l, network_state state)
{
int out_h = convolutional_out_height(l);
int out_w = convolutional_out_width(l);
int out_h = l.out_h;
int out_w = l.out_w;
int i;
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
@ -458,8 +452,9 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
if(l.batch_normalize){
forward_batchnorm_layer(l, state);
} else {
add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
}
add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
activate_array(l.output, m*n*l.batch, l.activation);
if(l.binary || l.xnor) swap_binary(&l);
@ -470,14 +465,14 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
int i;
int m = l.n;
int n = l.size*l.size*l.c;
int k = convolutional_out_height(l)*
convolutional_out_width(l);
int k = l.out_w*l.out_h;
gradient_array(l.output, m*k*l.batch, l.activation, l.delta);
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
if(l.batch_normalize){
backward_batchnorm_layer(l, state);
} else {
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
}
for(i = 0; i < l.batch; ++i){

View File

@ -12,6 +12,7 @@ COST_TYPE get_cost_type(char *s)
if (strcmp(s, "sse")==0) return SSE;
if (strcmp(s, "masked")==0) return MASKED;
if (strcmp(s, "smooth")==0) return SMOOTH;
if (strcmp(s, "L1")==0) return L1;
fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
return SSE;
}
@ -25,6 +26,8 @@ char *get_cost_string(COST_TYPE a)
return "masked";
case SMOOTH:
return "smooth";
case L1:
return "L1";
}
return "sse";
}
@ -81,6 +84,8 @@ void forward_cost_layer(cost_layer l, network_state state)
}
if(l.cost_type == SMOOTH){
smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
}else if(l.cost_type == L1){
l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
} else {
l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
}
@ -116,12 +121,18 @@ int float_abs_compare (const void * a, const void * b)
void forward_cost_layer_gpu(cost_layer l, network_state state)
{
if (!state.truth) return;
if(l.smooth){
scal_ongpu(l.batch*l.inputs, (1-l.smooth), state.truth, 1);
add_ongpu(l.batch*l.inputs, l.smooth * 1./l.inputs, state.truth, 1);
}
if (l.cost_type == MASKED) {
mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth);
}
if(l.cost_type == SMOOTH){
smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
} else if (l.cost_type == L1){
l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
} else {
l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
}
@ -136,6 +147,10 @@ void forward_cost_layer_gpu(cost_layer l, network_state state)
supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
}
if(l.thresh){
supp_ongpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1);
}
cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
l.cost[0] = sum_array(l.output, l.batch*l.inputs);
}

View File

@ -157,4 +157,13 @@ void cuda_pull_array(float *x_gpu, float *x, size_t n)
check_error(status);
}
float cuda_mag_array(float *x_gpu, size_t n)
{
float *temp = calloc(n, sizeof(float));
cuda_pull_array(x_gpu, temp, n);
float m = mag_array(temp, n);
free(temp);
return m;
}
#endif

View File

@ -26,6 +26,7 @@ void cuda_free(float *x_gpu);
void cuda_random(float *x_gpu, size_t n);
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
dim3 cuda_gridsize(size_t n);
float cuda_mag_array(float *x_gpu, size_t n);
#ifdef CUDNN
cudnnHandle_t cudnn_handle();

View File

@ -24,6 +24,7 @@ extern void run_nightmare(int argc, char **argv);
extern void run_dice(int argc, char **argv);
extern void run_compare(int argc, char **argv);
extern void run_classifier(int argc, char **argv);
extern void run_regressor(int argc, char **argv);
extern void run_char_rnn(int argc, char **argv);
extern void run_vid_rnn(int argc, char **argv);
extern void run_tag(int argc, char **argv);
@ -31,6 +32,7 @@ extern void run_cifar(int argc, char **argv);
extern void run_go(int argc, char **argv);
extern void run_art(int argc, char **argv);
extern void run_super(int argc, char **argv);
extern void run_lsd(int argc, char **argv);
void average(int argc, char *argv[])
{
@ -95,7 +97,7 @@ void speed(char *cfgfile, int tics)
set_batch_network(&net, 1);
int i;
time_t start = time(0);
image im = make_image(net.w, net.h, net.c);
image im = make_image(net.w, net.h, net.c*net.batch);
for(i = 0; i < tics; ++i){
network_predict(net, im.data);
}
@ -150,12 +152,24 @@ void oneoff(char *cfgfile, char *weightfile, char *outfile)
save_weights(net, outfile);
}
void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l)
{
gpu_index = -1;
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights_upto(&net, weightfile, 0, net.n);
load_weights_upto(&net, weightfile, l, net.n);
}
*net.seen = 0;
save_weights_upto(net, outfile, net.n);
}
void partial(char *cfgfile, char *weightfile, char *outfile, int max)
{
gpu_index = -1;
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights_upto(&net, weightfile, max);
load_weights_upto(&net, weightfile, 0, max);
}
*net.seen = 0;
save_weights_upto(net, outfile, max);
@ -380,6 +394,8 @@ int main(int argc, char **argv)
run_voxel(argc, argv);
} else if (0 == strcmp(argv[1], "super")){
run_super(argc, argv);
} else if (0 == strcmp(argv[1], "lsd")){
run_lsd(argc, argv);
} else if (0 == strcmp(argv[1], "detector")){
run_detector(argc, argv);
} else if (0 == strcmp(argv[1], "detect")){
@ -400,6 +416,8 @@ int main(int argc, char **argv)
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
} else if (0 == strcmp(argv[1], "classifier")){
run_classifier(argc, argv);
} else if (0 == strcmp(argv[1], "regressor")){
run_regressor(argc, argv);
} else if (0 == strcmp(argv[1], "art")){
run_art(argc, argv);
} else if (0 == strcmp(argv[1], "tag")){
@ -436,6 +454,8 @@ int main(int argc, char **argv)
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
} else if (0 == strcmp(argv[1], "oneoff")){
oneoff(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "oneoff2")){
oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "partial")){
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "average")){

View File

@ -317,7 +317,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
h = boxes[i].h;
id = boxes[i].id;
if ((w < .005 || h < .005)) continue;
if ((w < .001 || h < .001)) continue;
truth[i*5+0] = x;
truth[i*5+1] = y;
@ -393,7 +393,7 @@ void fill_truth(char *path, char **labels, int k, float *truth)
++count;
}
}
if(count != 1) printf("Too many or too few labels: %d, %s\n", count, path);
if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path);
}
void fill_hierarchy(float *truth, int k, tree *hierarchy)
@ -428,6 +428,24 @@ void fill_hierarchy(float *truth, int k, tree *hierarchy)
}
}
matrix load_regression_labels_paths(char **paths, int n)
{
matrix y = make_matrix(n, 1);
int i;
for(i = 0; i < n; ++i){
char labelpath[4096];
find_replace(paths[i], "images", "targets", labelpath);
find_replace(labelpath, "JPEGImages", "targets", labelpath);
find_replace(labelpath, ".jpg", ".txt", labelpath);
find_replace(labelpath, ".png", ".txt", labelpath);
FILE *file = fopen(labelpath, "r");
fscanf(file, "%f", &(y.vals[i][0]));
fclose(file);
}
return y;
}
matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy)
{
matrix y = make_matrix(n, k);
@ -673,45 +691,44 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
d.y = make_matrix(n, 5*boxes);
for(i = 0; i < n; ++i){
image orig = load_image_color(random_paths[i], 0, 0);
image sized = make_image(w, h, orig.c);
fill_image(sized, .5);
float dw = jitter * orig.w;
float dh = jitter * orig.h;
int oh = orig.h;
int ow = orig.w;
float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh));
float scale = rand_uniform(.25, 2);
int dw = (ow*jitter);
int dh = (oh*jitter);
float nw, nh;
if(new_ar < 1){
nh = scale * h;
nw = nh * new_ar;
} else {
nw = scale * w;
nh = nw / new_ar;
}
int pleft = rand_uniform(-dw, dw);
int pright = rand_uniform(-dw, dw);
int ptop = rand_uniform(-dh, dh);
int pbot = rand_uniform(-dh, dh);
float dx = rand_uniform(0, w - nw);
float dy = rand_uniform(0, h - nh);
int swidth = ow - pleft - pright;
int sheight = oh - ptop - pbot;
place_image(orig, nw, nh, dx, dy, sized);
float sx = (float)swidth / ow;
float sy = (float)sheight / oh;
int flip = rand()%2;
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
float dx = ((float)pleft/ow)/sx;
float dy = ((float)ptop /oh)/sy;
image sized = resize_image(cropped, w, h);
if(flip) flip_image(sized);
random_distort_image(sized, hue, saturation, exposure);
int flip = rand()%2;
if(flip) flip_image(sized);
d.X.vals[i] = sized.data;
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h);
free_image(orig);
free_image(cropped);
}
free(random_paths);
return d;
}
void *load_thread(void *ptr)
{
//printf("Loading data: %d\n", rand());
@ -722,6 +739,8 @@ void *load_thread(void *ptr)
if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == REGRESSION_DATA){
*a.d = load_data_regression(a.paths, a.n, a.m, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == SUPER_DATA){
@ -739,6 +758,9 @@ void *load_thread(void *ptr)
} else if (a.type == IMAGE_DATA){
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
} else if (a.type == LETTERBOX_DATA){
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = letterbox_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
}
@ -863,6 +885,17 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
return d;
}
data load_data_regression(char **paths, int n, int m, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
d.y = load_regression_labels_paths(paths, n);
if(m) free(paths);
return d;
}
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
if(m) paths = get_random_paths(paths, n, m);
@ -962,7 +995,6 @@ data load_cifar10_data(char *filename)
X.vals[i][j] = (double)bytes[j+1];
}
}
//translate_data_rows(d, -128);
scale_data_rows(d, 1./255);
//normalize_data_rows(d);
fclose(fp);
@ -1029,7 +1061,6 @@ data load_all_cifar10()
fclose(fp);
}
//normalize_data_rows(d);
//translate_data_rows(d, -128);
scale_data_rows(d, 1./255);
smooth_data(d);
return d;
@ -1113,6 +1144,19 @@ void translate_data_rows(data d, float s)
}
}
data copy_data(data d)
{
data c = {0};
c.w = d.w;
c.h = d.h;
c.shallow = 0;
c.num_boxes = d.num_boxes;
c.boxes = d.boxes;
c.X = copy_matrix(d.X);
c.y = copy_matrix(d.y);
return c;
}
void normalize_data_rows(data d)
{
int i;

View File

@ -28,7 +28,7 @@ typedef struct{
} data;
typedef enum {
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA
} data_type;
typedef struct load_args{
@ -83,6 +83,7 @@ data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
data load_data_regression(char **paths, int n, int m, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
data load_go(char *filename);
box_label *read_boxes(char *filename, int *n);
@ -106,5 +107,6 @@ data *split_data(data d, int part, int total);
data concat_data(data d1, data d2);
data concat_datas(data *d, int n);
void fill_truth(char *path, char **labels, int k, float *truth);
data copy_data(data d);
#endif

View File

@ -5,6 +5,7 @@
extern "C" {
#include "convolutional_layer.h"
#include "deconvolutional_layer.h"
#include "batchnorm_layer.h"
#include "gemm.h"
#include "blas.h"
#include "im2col.h"
@ -13,97 +14,119 @@ extern "C" {
#include "cuda.h"
}
extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
extern "C" void forward_deconvolutional_layer_gpu(layer l, network_state state)
{
int i;
int out_h = deconvolutional_out_height(layer);
int out_w = deconvolutional_out_width(layer);
int out_h = l.out_h;
int out_w = l.out_w;
int size = out_h*out_w;
int m = layer.size*layer.size*layer.n;
int n = layer.h*layer.w;
int k = layer.c;
int m = l.size*l.size*l.n;
int n = l.h*l.w;
int k = l.c;
fill_ongpu(layer.outputs*layer.batch, 0, layer.output_gpu, 1);
fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);
for(i = 0; i < layer.batch; ++i){
float *a = layer.weights_gpu;
float *b = state.input + i*layer.c*layer.h*layer.w;
float *c = layer.col_image_gpu;
for(i = 0; i < l.batch; ++i){
float *a = l.weights_gpu;
float *b = state.input + i*l.c*l.h*l.w;
float *c = state.workspace;
gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
col2im_ongpu(c, layer.n, out_h, out_w, layer.size, layer.stride, 0, layer.output_gpu+i*layer.n*size);
col2im_ongpu(c, l.n, out_h, out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.n*size);
}
add_bias_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, size);
activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation);
if (l.batch_normalize) {
forward_batchnorm_layer_gpu(l, state);
} else {
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
}
activate_array_ongpu(l.output_gpu, l.batch*l.n*size, l.activation);
}
extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
extern "C" void backward_deconvolutional_layer_gpu(layer l, network_state state)
{
float alpha = 1./layer.batch;
int out_h = deconvolutional_out_height(layer);
int out_w = deconvolutional_out_width(layer);
int out_h = l.out_h;
int out_w = l.out_w;
int size = out_h*out_w;
int i;
gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu);
backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size);
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
if(l.batch_normalize){
backward_batchnorm_layer_gpu(l, state);
} else {
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
}
for(i = 0; i < layer.batch; ++i){
int m = layer.c;
int n = layer.size*layer.size*layer.n;
int k = layer.h*layer.w;
//if(state.delta) memset(state.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));
for(i = 0; i < l.batch; ++i){
int m = l.c;
int n = l.size*l.size*l.n;
int k = l.h*l.w;
float *a = state.input + i*m*n;
float *b = layer.col_image_gpu;
float *c = layer.weight_updates_gpu;
float *b = state.workspace;
float *c = l.weight_updates_gpu;
im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w,
layer.size, layer.stride, 0, b);
gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
im2col_ongpu(l.delta_gpu + i*l.n*size, l.n, out_h, out_w,
l.size, l.stride, l.pad, b);
gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
if(state.delta){
int m = layer.c;
int n = layer.h*layer.w;
int k = layer.size*layer.size*layer.n;
int m = l.c;
int n = l.h*l.w;
int k = l.size*l.size*l.n;
float *a = layer.weights_gpu;
float *b = layer.col_image_gpu;
float *a = l.weights_gpu;
float *b = state.workspace;
float *c = state.delta + i*n*m;
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
}
}
extern "C" void pull_deconvolutional_layer(deconvolutional_layer layer)
extern "C" void pull_deconvolutional_layer(layer l)
{
cuda_pull_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size);
cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size);
cuda_pull_array(l.biases_gpu, l.biases, l.n);
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size);
cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
if (l.batch_normalize){
cuda_pull_array(l.scales_gpu, l.scales, l.n);
cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
}
}
extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
extern "C" void push_deconvolutional_layer(layer l)
{
cuda_push_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
cuda_push_array(layer.biases_gpu, layer.biases, layer.n);
cuda_push_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size);
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size);
cuda_push_array(l.biases_gpu, l.biases, l.n);
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size);
cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
if (l.batch_normalize){
cuda_push_array(l.scales_gpu, l.scales, l.n);
cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
}
}
extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
void update_deconvolutional_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay)
{
int size = layer.size*layer.size*layer.c*layer.n;
int size = l.size*l.size*l.c*l.n;
axpy_ongpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
scal_ongpu(l.n, momentum, l.bias_updates_gpu, 1);
axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1);
if(l.scales_gpu){
axpy_ongpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
scal_ongpu(l.n, momentum, l.scale_updates_gpu, 1);
}
axpy_ongpu(size, -decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
axpy_ongpu(size, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
scal_ongpu(size, momentum, layer.weight_updates_gpu, 1);
axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
axpy_ongpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
scal_ongpu(size, momentum, l.weight_updates_gpu, 1);
}

View File

@ -1,5 +1,6 @@
#include "deconvolutional_layer.h"
#include "convolutional_layer.h"
#include "batchnorm_layer.h"
#include "utils.h"
#include "im2col.h"
#include "col2im.h"
@ -8,45 +9,25 @@
#include <stdio.h>
#include <time.h>
int deconvolutional_out_height(deconvolutional_layer l)
{
int h = l.stride*(l.h - 1) + l.size;
return h;
static size_t get_workspace_size(layer l){
return (size_t)l.h*l.w*l.size*l.size*l.c*sizeof(float);
}
int deconvolutional_out_width(deconvolutional_layer l)
int deconvolutional_out_height(layer l)
{
int w = l.stride*(l.w - 1) + l.size;
return w;
return (l.h) * l.stride + l.size/2 - l.pad;
}
int deconvolutional_out_size(deconvolutional_layer l)
int deconvolutional_out_width(layer l)
{
return deconvolutional_out_height(l) * deconvolutional_out_width(l);
return (l.w) * l.stride + l.size/2 - l.pad;
}
image get_deconvolutional_image(deconvolutional_layer l)
{
int h,w,c;
h = deconvolutional_out_height(l);
w = deconvolutional_out_width(l);
c = l.n;
return float_to_image(w,h,c,l.output);
}
image get_deconvolutional_delta(deconvolutional_layer l)
{
int h,w,c;
h = deconvolutional_out_height(l);
w = deconvolutional_out_width(l);
c = l.n;
return float_to_image(w,h,c,l.delta);
}
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, int batch_normalize)
{
int i;
deconvolutional_layer l = {0};
layer l = {0};
l.type = DECONVOLUTIONAL;
l.h = h;
@ -67,71 +48,135 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
for(i = 0; i < n; ++i){
l.biases[i] = scale;
}
int out_h = deconvolutional_out_height(l);
int out_w = deconvolutional_out_width(l);
l.pad = l.size/2;
l.out_h = out_h;
l.out_w = out_w;
l.out_h = (l.h) * l.stride + l.size/2 - l.pad;
l.out_w = (l.w) * l.stride + l.size/2 - l.pad;
l.out_c = n;
l.outputs = l.out_w * l.out_h * l.out_c;
l.inputs = l.w * l.h * l.c;
l.col_image = calloc(h*w*size*size*n, sizeof(float));
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
l.output = calloc(l.batch*l.out_h * l.out_w * n, sizeof(float));
l.delta = calloc(l.batch*l.out_h * l.out_w * n, sizeof(float));
l.forward = forward_deconvolutional_layer;
l.backward = backward_deconvolutional_layer;
l.update = update_deconvolutional_layer;
#ifdef GPU
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
l.batch_normalize = batch_normalize;
l.biases_gpu = cuda_make_array(l.biases, n);
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
if(batch_normalize){
l.scales = calloc(n, sizeof(float));
l.scale_updates = calloc(n, sizeof(float));
for(i = 0; i < n; ++i){
l.scales[i] = 1;
}
l.col_image_gpu = cuda_make_array(l.col_image, h*w*size*size*n);
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
l.mean = calloc(n, sizeof(float));
l.variance = calloc(n, sizeof(float));
l.mean_delta = calloc(n, sizeof(float));
l.variance_delta = calloc(n, sizeof(float));
l.rolling_mean = calloc(n, sizeof(float));
l.rolling_variance = calloc(n, sizeof(float));
l.x = calloc(l.batch*l.outputs, sizeof(float));
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
}
#ifdef GPU
l.forward_gpu = forward_deconvolutional_layer_gpu;
l.backward_gpu = backward_deconvolutional_layer_gpu;
l.update_gpu = update_deconvolutional_layer_gpu;
if(gpu_index >= 0){
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
l.biases_gpu = cuda_make_array(l.biases, n);
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n);
l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
if(batch_normalize){
l.mean_gpu = cuda_make_array(l.mean, n);
l.variance_gpu = cuda_make_array(l.variance, n);
l.rolling_mean_gpu = cuda_make_array(l.mean, n);
l.rolling_variance_gpu = cuda_make_array(l.variance, n);
l.mean_delta_gpu = cuda_make_array(l.mean, n);
l.variance_delta_gpu = cuda_make_array(l.variance, n);
l.scales_gpu = cuda_make_array(l.scales, n);
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
l.x_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
}
}
#ifdef CUDNN
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
cudnnCreateTensorDescriptor(&l.normTensorDesc);
cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
#endif
#endif
l.activation = activation;
l.workspace_size = get_workspace_size(l);
fprintf(stderr, "Deconvolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
return l;
}
void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w)
void resize_deconvolutional_layer(layer *l, int h, int w)
{
l->h = h;
l->w = w;
int out_h = deconvolutional_out_height(*l);
int out_w = deconvolutional_out_width(*l);
l->out_h = (l->h) * l->stride + l->size/2 - l->pad;
l->out_w = (l->w) * l->stride + l->size/2 - l->pad;
l->col_image = realloc(l->col_image,
out_h*out_w*l->size*l->size*l->c*sizeof(float));
l->output = realloc(l->output,
l->batch*out_h * out_w * l->n*sizeof(float));
l->delta = realloc(l->delta,
l->batch*out_h * out_w * l->n*sizeof(float));
#ifdef GPU
cuda_free(l->col_image_gpu);
l->outputs = l->out_h * l->out_w * l->out_c;
l->inputs = l->w * l->h * l->c;
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
if(l->batch_normalize){
l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
}
#ifdef GPU
cuda_free(l->delta_gpu);
cuda_free(l->output_gpu);
l->col_image_gpu = cuda_make_array(l->col_image, out_h*out_w*l->size*l->size*l->c);
l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*l->n);
l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*l->n);
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
if(l->batch_normalize){
cuda_free(l->x_gpu);
cuda_free(l->x_norm_gpu);
l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
}
#ifdef CUDNN
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
#endif
#endif
l->workspace_size = get_workspace_size(*l);
}
void forward_deconvolutional_layer(const deconvolutional_layer l, network_state state)
void forward_deconvolutional_layer(const layer l, network_state state)
{
int i;
int out_h = deconvolutional_out_height(l);
int out_w = deconvolutional_out_width(l);
int out_h = l.out_h;
int out_w = l.out_w;
int size = out_h*out_w;
int m = l.size*l.size*l.n;
@ -143,17 +188,22 @@ void forward_deconvolutional_layer(const deconvolutional_layer l, network_state
for(i = 0; i < l.batch; ++i){
float *a = l.weights;
float *b = state.input + i*l.c*l.h*l.w;
float *c = l.col_image;
float *c = state.workspace;
gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);
col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size);
}
add_bias(l.output, l.biases, l.batch, l.n, size);
if(l.batch_normalize){
forward_batchnorm_layer(l, state);
} else {
add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
}
activate_array(l.output, l.batch*l.n*size, l.activation);
}
void backward_deconvolutional_layer(deconvolutional_layer l, network_state state)
void backward_deconvolutional_layer(layer l, network_state state)
{
float alpha = 1./l.batch;
int out_h = deconvolutional_out_height(l);
@ -162,7 +212,11 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
int i;
gradient_array(l.output, size*l.n*l.batch, l.activation, l.delta);
backward_bias(l.bias_updates, l.delta, l.batch, l.n, size);
if(l.batch_normalize){
backward_batchnorm_layer(l, state);
} else {
backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
}
for(i = 0; i < l.batch; ++i){
int m = l.c;
@ -170,7 +224,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
int k = l.h*l.w;
float *a = state.input + i*m*n;
float *b = l.col_image;
float *b = state.workspace;
float *c = l.weight_updates;
im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w,
@ -183,7 +237,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
int k = l.size*l.size*l.n;
float *a = l.weights;
float *b = l.col_image;
float *b = state.workspace;
float *c = state.delta + i*n*m;
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
@ -191,14 +245,19 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
}
}
void update_deconvolutional_layer(deconvolutional_layer l, float learning_rate, float momentum, float decay)
void update_deconvolutional_layer(layer l, int batch, float learning_rate, float momentum, float decay)
{
int size = l.size*l.size*l.c*l.n;
axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1);
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
scal_cpu(l.n, momentum, l.bias_updates, 1);
axpy_cpu(size, -decay, l.weights, 1, l.weight_updates, 1);
axpy_cpu(size, learning_rate, l.weight_updates, 1, l.weights, 1);
if(l.scales){
axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
scal_cpu(l.n, momentum, l.scale_updates, 1);
}
axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
scal_cpu(size, momentum, l.weight_updates, 1);
}

View File

@ -7,28 +7,19 @@
#include "layer.h"
#include "network.h"
typedef layer deconvolutional_layer;
#ifdef GPU
void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
void push_deconvolutional_layer(deconvolutional_layer layer);
void pull_deconvolutional_layer(deconvolutional_layer layer);
void forward_deconvolutional_layer_gpu(layer l, network_state state);
void backward_deconvolutional_layer_gpu(layer l, network_state state);
void update_deconvolutional_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_deconvolutional_layer(layer l);
void pull_deconvolutional_layer(layer l);
#endif
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
image get_deconvolutional_image(deconvolutional_layer layer);
image get_deconvolutional_delta(deconvolutional_layer layer);
image get_deconvolutional_filter(deconvolutional_layer layer, int i);
int deconvolutional_out_height(deconvolutional_layer layer);
int deconvolutional_out_width(deconvolutional_layer layer);
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, int batch_normalize);
void resize_deconvolutional_layer(layer *l, int h, int w);
void forward_deconvolutional_layer(const layer l, network_state state);
void update_deconvolutional_layer(layer l, int batch, float learning_rate, float momentum, float decay);
void backward_deconvolutional_layer(layer l, network_state state);
#endif

View File

@ -58,7 +58,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
int index = b*l.inputs;
for (i = 0; i < locations; ++i) {
int offset = i*l.classes;
softmax(l.output + index + offset, l.classes, 1,
softmax(l.output + index + offset, l.classes, 1, 1,
l.output + index + offset);
}
}
@ -101,13 +101,13 @@ void forward_detection_layer(const detection_layer l, network_state state)
avg_allcat += l.output[class_index+j];
}
box truth = float_to_box(state.truth + truth_index + 1 + l.classes);
box truth = float_to_box(state.truth + truth_index + 1 + l.classes, 1);
truth.x /= l.side;
truth.y /= l.side;
for(j = 0; j < l.n; ++j){
int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords;
box out = float_to_box(l.output + box_index);
box out = float_to_box(l.output + box_index, 1);
out.x /= l.side;
out.y /= l.side;
@ -146,7 +146,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords;
int tbox_index = truth_index + 1 + l.classes;
box out = float_to_box(l.output + box_index);
box out = float_to_box(l.output + box_index, 1);
out.x /= l.side;
out.y /= l.side;
if (l.sqrt) {

View File

@ -6,6 +6,7 @@
#include "box.h"
#include "demo.h"
#include "option_list.h"
#include "blas.h"
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
@ -103,21 +104,28 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
load_thread = load_data(args);
/*
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
if(!b.x) break;
printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
}
image im = float_to_image(448, 448, 3, train.X.vals[10]);
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
printf("%d %d %d %d\n", truth.x, truth.y, truth.w, truth.h);
draw_bbox(im, b, 8, 1,0,0);
}
save_image(im, "truth11");
*/
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
if(!b.x) break;
printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
}
*/
/*
int zz;
for(zz = 0; zz < train.X.cols; ++zz){
image im = float_to_image(net.w, net.h, 3, train.X.vals[zz]);
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[zz] + k*5);
printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
draw_bbox(im, b, 1, 1,0,0);
}
show_image(im, "truth11");
cvWaitKey(0);
save_image(im, "truth11");
}
*/
printf("Loaded: %lf seconds\n", sec(clock()-time));
@ -192,13 +200,13 @@ void print_detector_detections(FILE **fps, char *id, box *boxes, float **probs,
{
int i, j;
for(i = 0; i < total; ++i){
float xmin = boxes[i].x - boxes[i].w/2.;
float xmax = boxes[i].x + boxes[i].w/2.;
float ymin = boxes[i].y - boxes[i].h/2.;
float ymax = boxes[i].y + boxes[i].h/2.;
float xmin = boxes[i].x - boxes[i].w/2. + 1;
float xmax = boxes[i].x + boxes[i].w/2. + 1;
float ymin = boxes[i].y - boxes[i].h/2. + 1;
float ymax = boxes[i].y + boxes[i].h/2. + 1;
if (xmin < 0) xmin = 0;
if (ymin < 0) ymin = 0;
if (xmin < 1) xmin = 1;
if (ymin < 1) ymin = 1;
if (xmax > w) xmax = w;
if (ymax > h) ymax = h;
@ -231,6 +239,142 @@ void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int
}
}
void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
{
int j;
list *options = read_data_cfg(datacfg);
char *valid_images = option_find_str(options, "valid", "data/train.list");
char *name_list = option_find_str(options, "names", "data/names.list");
char *prefix = option_find_str(options, "results", "results");
char **names = get_labels(name_list);
char *mapf = option_find_str(options, "map", 0);
int *map = 0;
if (mapf) map = read_map(mapf);
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 2);
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
srand(time(0));
list *plist = get_paths(valid_images);
char **paths = (char **)list_to_array(plist);
layer l = net.layers[net.n-1];
int classes = l.classes;
char buff[1024];
char *type = option_find_str(options, "eval", "voc");
FILE *fp = 0;
FILE **fps = 0;
int coco = 0;
int imagenet = 0;
if(0==strcmp(type, "coco")){
if(!outfile) outfile = "coco_results";
snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
fp = fopen(buff, "w");
fprintf(fp, "[\n");
coco = 1;
} else if(0==strcmp(type, "imagenet")){
if(!outfile) outfile = "imagenet-detection";
snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
fp = fopen(buff, "w");
imagenet = 1;
classes = 200;
} else {
if(!outfile) outfile = "comp4_det_test_";
fps = calloc(classes, sizeof(FILE *));
for(j = 0; j < classes; ++j){
snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
fps[j] = fopen(buff, "w");
}
}
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
int m = plist->size;
int i=0;
int t;
float thresh = .005;
float nms = .45;
int nthreads = 4;
image *val = calloc(nthreads, sizeof(image));
image *val_resized = calloc(nthreads, sizeof(image));
image *buf = calloc(nthreads, sizeof(image));
image *buf_resized = calloc(nthreads, sizeof(image));
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
image input = make_image(net.w, net.h, net.c*2);
load_args args = {0};
args.w = net.w;
args.h = net.h;
//args.type = IMAGE_DATA;
args.type = LETTERBOX_DATA;
for(t = 0; t < nthreads; ++t){
args.path = paths[i+t];
args.im = &buf[t];
args.resized = &buf_resized[t];
thr[t] = load_data_in_thread(args);
}
time_t start = time(0);
for(i = nthreads; i < m+nthreads; i += nthreads){
fprintf(stderr, "%d\n", i);
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
pthread_join(thr[t], 0);
val[t] = buf[t];
val_resized[t] = buf_resized[t];
}
for(t = 0; t < nthreads && i+t < m; ++t){
args.path = paths[i+t];
args.im = &buf[t];
args.resized = &buf_resized[t];
thr[t] = load_data_in_thread(args);
}
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
char *path = paths[i+t-nthreads];
char *id = basecfg(path);
copy_cpu(net.w*net.h*net.c, val_resized[t].data, 1, input.data, 1);
flip_image(val_resized[t]);
copy_cpu(net.w*net.h*net.c, val_resized[t].data, 1, input.data + net.w*net.h*net.c, 1);
network_predict(net, input.data);
int w = val[t].w;
int h = val[t].h;
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
if (coco){
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
} else if (imagenet){
print_imagenet_detections(fp, i+t-nthreads+1, boxes, probs, l.w*l.h*l.n, classes, w, h);
} else {
print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h);
}
free(id);
free_image(val[t]);
free_image(val_resized[t]);
}
}
for(j = 0; j < classes; ++j){
if(fps) fclose(fps[j]);
}
if(coco){
fseek(fp, -2, SEEK_CUR);
fprintf(fp, "\n]\n");
fclose(fp);
}
fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
}
void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
{
int j;
@ -306,7 +450,8 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.type = IMAGE_DATA;
//args.type = IMAGE_DATA;
args.type = LETTERBOX_DATA;
for(t = 0; t < nthreads; ++t){
args.path = paths[i+t];
@ -467,7 +612,11 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
strtok(input, "\n");
}
image im = load_image_color(input,0,0);
image sized = resize_image(im, net.w, net.h);
image sized = letterbox_image(im, net.w, net.h);
//image sized = resize_image(im, net.w, net.h);
//image sized2 = resize_max(im, net.w);
//image sized = crop_image(sized2, -((net.w - sized2.w)/2), -((net.h - sized2.h)/2), net.w, net.h);
//resize_network(&net, sized.w, sized.h);
layer l = net.layers[net.n-1];
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
@ -481,9 +630,9 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);
if (l.softmax_tree && nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
save_image(im, "predictions");
show_image(im, "predictions");
draw_detections(sized, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
save_image(sized, "predictions");
show_image(sized, "predictions");
free_image(im);
free_image(sized);
@ -541,6 +690,7 @@ void run_detector(int argc, char **argv)
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh);
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) {
list *options = read_data_cfg(datacfg);

326
src/go.c
View File

@ -3,6 +3,8 @@
#include "parser.h"
#include "option_list.h"
#include "blas.h"
#include "data.h"
#include <unistd.h>
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
@ -10,7 +12,7 @@
int inverted = 1;
int noi = 1;
static const int nind = 5;
static const int nind = 2;
typedef struct {
char **data;
@ -88,22 +90,30 @@ void board_to_string(char *s, float *board)
}
}
void random_go_moves(moves m, float *boards, float *labels, int n)
data random_go_moves(moves m, int n)
{
data d = {0};
d.X = make_matrix(n, 19*19);
d.y = make_matrix(n, 19*19+1);
int i;
memset(labels, 0, 19*19*n*sizeof(float));
for(i = 0; i < n; ++i){
float *board = d.X.vals[i];
float *label = d.y.vals[i];
char *b = m.data[rand()%m.n];
int row = b[0];
int col = b[1];
labels[col + 19*(row + i*19)] = 1;
string_to_board(b+2, boards+i*19*19);
boards[col + 19*(row + i*19)] = 0;
if(row >= 19 || col >= 19){
label[19*19] = 1;
} else {
label[col + 19*row] = 1;
string_to_board(b+2, board);
if(board[col + 19*row]) printf("hey\n");
}
int flip = rand()%2;
int rotate = rand()%4;
image in = float_to_image(19, 19, 1, boards+i*19*19);
image out = float_to_image(19, 19, 1, labels+i*19*19);
image in = float_to_image(19, 19, 1, board);
image out = float_to_image(19, 19, 1, label);
if(flip){
flip_image(in);
flip_image(out);
@ -111,36 +121,60 @@ void random_go_moves(moves m, float *boards, float *labels, int n)
rotate_image_cw(in, rotate);
rotate_image_cw(out, rotate);
}
return d;
}
void train_go(char *cfgfile, char *weightfile)
void train_go(char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
{
srand(time(0));
int i;
float avg_loss = -1;
char *base = basecfg(cfgfile);
printf("%s\n", base);
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
printf("%d\n", ngpus);
network *nets = calloc(ngpus, sizeof(network));
srand(time(0));
int seed = rand();
for(i = 0; i < ngpus; ++i){
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);
#endif
nets[i] = load_network(cfgfile, weightfile, clear);
nets[i].learning_rate *= ngpus;
}
network net = nets[0];
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
char *backup_directory = "/home/pjreddie/backup/";
char buff[256];
float *board = calloc(19*19*net.batch, sizeof(float));
float *move = calloc(19*19*net.batch, sizeof(float));
moves m = load_go_moves("/home/pjreddie/backup/go.train");
//moves m = load_go_moves("games.txt");
int N = m.n;
printf("Moves: %d\n", N);
int epoch = (*net.seen)/N;
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
clock_t time=clock();
random_go_moves(m, board, move, net.batch);
float loss = train_network_datum(net, board, move) / net.batch;
data train = random_go_moves(m, net.batch*net.subdivisions*ngpus);
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
float loss = 0;
#ifdef GPU
if(ngpus == 1){
loss = train_network(net, train);
} else {
loss = train_networks(nets, ngpus, train, 4);
}
#else
loss = train_network(net, train);
#endif
free_data(train);
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.95 + loss*.05;
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
@ -151,7 +185,7 @@ void train_go(char *cfgfile, char *weightfile)
save_weights(net, buff);
}
if(get_current_batch(net)%100 == 0){
if(get_current_batch(net)%1000 == 0){
char buff[256];
sprintf(buff, "%s/%s.backup",backup_directory,base);
save_weights(net, buff);
@ -204,12 +238,9 @@ int *calculate_liberties(float *board)
return lib;
}
void print_board(float *board, int swap, int *indexes)
void print_board(FILE *stream, float *board, int swap, int *indexes)
{
//FILE *stream = stdout;
FILE *stream = stderr;
int i,j,n;
fprintf(stream, "\n\n");
fprintf(stream, " ");
for(i = 0; i < 19; ++i){
fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi));
@ -225,12 +256,12 @@ void print_board(float *board, int swap, int *indexes)
if(index == indexes[n]){
found = 1;
/*
if(n == 0) fprintf(stream, "\uff11");
else if(n == 1) fprintf(stream, "\uff12");
else if(n == 2) fprintf(stream, "\uff13");
else if(n == 3) fprintf(stream, "\uff14");
else if(n == 4) fprintf(stream, "\uff15");
*/
if(n == 0) fprintf(stream, "\uff11");
else if(n == 1) fprintf(stream, "\uff12");
else if(n == 2) fprintf(stream, "\uff13");
else if(n == 3) fprintf(stream, "\uff14");
else if(n == 4) fprintf(stream, "\uff15");
*/
if(n == 0) fprintf(stream, " 1");
else if(n == 1) fprintf(stream, " 2");
else if(n == 2) fprintf(stream, " 3");
@ -261,7 +292,7 @@ void flip_board(float *board)
void predict_move(network net, float *board, float *move, int multi)
{
float *output = network_predict(net, board);
copy_cpu(19*19, output, 1, move, 1);
copy_cpu(19*19+1, output, 1, move, 1);
int i;
if(multi){
image bim = float_to_image(19, 19, 1, board);
@ -275,12 +306,12 @@ void predict_move(network net, float *board, float *move, int multi)
if(i >= 4) flip_image(oim);
rotate_image_cw(oim, -i);
axpy_cpu(19*19, 1, output, 1, move, 1);
axpy_cpu(19*19+1, 1, output, 1, move, 1);
if(i >= 4) flip_image(bim);
rotate_image_cw(bim, -i);
}
scal_cpu(19*19, 1./8., move, 1);
scal_cpu(19*19+1, 1./8., move, 1);
}
for(i = 0; i < 19*19; ++i){
if(board[i]) move[i] = 0;
@ -350,14 +381,24 @@ int legal_go(float *b, char *ko, int p, int r, int c)
int generate_move(network net, int player, float *board, int multi, float thresh, float temp, char *ko, int print)
{
int i, j;
int empty = 1;
for(i = 0; i < 19*19; ++i){
if (board[i]) {
empty = 0;
break;
}
}
if(empty) {
return 72;
}
for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
float move[361];
float move[362];
if (player < 0) flip_board(board);
predict_move(net, board, move, multi);
if (player < 0) flip_board(board);
for(i = 0; i < 19; ++i){
for(j = 0; j < 19; ++j){
if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0;
@ -365,40 +406,43 @@ int generate_move(network net, int player, float *board, int multi, float thresh
}
int indexes[nind];
top_k(move, 19*19, nind, indexes);
top_k(move, 19*19+1, nind, indexes);
if(thresh > move[indexes[0]]) thresh = move[indexes[nind-1]];
for(i = 0; i < 19; ++i){
for(j = 0; j < 19; ++j){
if (move[i*19 + j] < thresh) move[i*19 + j] = 0;
}
for(i = 0; i < 19*19+1; ++i){
if (move[i] < thresh) move[i] = 0;
}
int max = max_index(move, 19*19);
int max = max_index(move, 19*19+1);
int row = max / 19;
int col = max % 19;
int index = sample_array(move, 19*19);
int index = sample_array(move, 19*19+1);
if(print){
top_k(move, 19*19, nind, indexes);
top_k(move, 19*19+1, nind, indexes);
for(i = 0; i < nind; ++i){
if (!move[indexes[i]]) indexes[i] = -1;
}
print_board(board, player, indexes);
print_board(stderr, board, player, indexes);
for(i = 0; i < nind; ++i){
fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]);
}
}
if (row == 19) return -1;
if(suicide_go(board, player, row, col)){
if (suicide_go(board, player, row, col)){
return -1;
}
if(suicide_go(board, player, index/19, index%19)) index = max;
if (suicide_go(board, player, index/19, index%19)){
index = max;
}
if (index == 19*19) return -1;
return index;
}
void valid_go(char *cfgfile, char *weightfile, int multi)
void valid_go(char *cfgfile, char *weightfile, int multi, char *filename)
{
srand(time(0));
char *base = basecfg(cfgfile);
@ -411,8 +455,9 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
float *board = calloc(19*19, sizeof(float));
float *move = calloc(19*19, sizeof(float));
moves m = load_go_moves("/home/pjreddie/backup/go.test");
float *move = calloc(19*19+1, sizeof(float));
// moves m = load_go_moves("/home/pjreddie/backup/go.test");
moves m = load_go_moves(filename);
int N = m.n;
int i;
@ -430,6 +475,23 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
}
}
int print_game(float *board, FILE *fp)
{
int i, j;
int count = 3;
fprintf(fp, "komi 6.5\n");
fprintf(fp, "boardsize 19\n");
fprintf(fp, "clear_board\n");
for(j = 0; j < 19; ++j){
for(i = 0; i < 19; ++i){
if(board[j*19 + i] == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j);
if(board[j*19 + i] == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j);
if(board[j*19 + i]) ++count;
}
}
return count;
}
void engine_go(char *filename, char *weightfile, int multi)
{
network net = parse_network_cfg(filename);
@ -456,8 +518,12 @@ void engine_go(char *filename, char *weightfile, int multi)
printf("=%s 2\n\n", ids);
} else if (!strcmp(buff, "name")){
printf("=%s DarkGo\n\n", ids);
} else if (!strcmp(buff, "time_settings") || !strcmp(buff, "time_left")){
char *line = fgetl(stdin);
free(line);
printf("=%s \n\n", ids);
} else if (!strcmp(buff, "version")){
printf("=%s 1.0\n\n", ids);
printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids);
} else if (!strcmp(buff, "known_command")){
char comm[256];
scanf("%s", comm);
@ -472,11 +538,14 @@ void engine_go(char *filename, char *weightfile, int multi)
!strcmp(comm, "komi") ||
!strcmp(comm, "final_status_list") ||
!strcmp(comm, "play") ||
!strcmp(comm, "genmove_white") ||
!strcmp(comm, "genmove_black") ||
!strcmp(comm, "fixed_handicap") ||
!strcmp(comm, "genmove"));
if(known) printf("=%s true\n\n", ids);
else printf("=%s false\n\n", ids);
} else if (!strcmp(buff, "list_commands")){
printf("=%s protocol_version\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove\nfinal_status_list\n\n", ids);
printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids);
} else if (!strcmp(buff, "quit")){
break;
} else if (!strcmp(buff, "boardsize")){
@ -486,8 +555,17 @@ void engine_go(char *filename, char *weightfile, int multi)
if(boardsize != 19){
printf("?%s unacceptable size\n\n", ids);
} else {
memset(board, 0, 19*19*sizeof(float));
printf("=%s \n\n", ids);
}
} else if (!strcmp(buff, "fixed_handicap")){
int handicap = 0;
scanf("%d", &handicap);
int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294};
int i;
for(i = 0; i < handicap; ++i){
board[indexes[i]] = 1;
}
} else if (!strcmp(buff, "clear_board")){
passed = 0;
memset(board, 0, 19*19*sizeof(float));
@ -496,14 +574,24 @@ void engine_go(char *filename, char *weightfile, int multi)
float komi = 0;
scanf("%f", &komi);
printf("=%s \n\n", ids);
} else if (!strcmp(buff, "play")){
} else if (!strcmp(buff, "showboard")){
printf("=%s \n", ids);
print_board(stdout, board, 1, 0);
printf("\n");
} else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){
char color[256];
scanf("%s ", color);
if(!strcmp(buff, "play"))
{
scanf("%s ", color);
} else {
scanf(" ");
color[0] = buff[0];
}
char c;
int r;
int count = scanf("%c%d", &c, &r);
int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1;
if(c == 'p' && count < 2) {
if((c == 'p' || c == 'P') && count < 2) {
passed = 1;
printf("=%s \n\n", ids);
char *line = fgetl(stdin);
@ -527,13 +615,20 @@ void engine_go(char *filename, char *weightfile, int multi)
board_to_string(one, board);
printf("=%s \n\n", ids);
print_board(board, 1, 0);
} else if (!strcmp(buff, "genmove")){
char color[256];
scanf("%s", color);
int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1;
//print_board(stderr, board, 1, 0);
} else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){
int player = 0;
if(!strcmp(buff, "genmove")){
char color[256];
scanf("%s", color);
player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1;
} else if (!strcmp(buff, "genmove_black")){
player = 1;
} else {
player = -1;
}
int index = generate_move(net, player, board, multi, .1, .7, two, 1);
int index = generate_move(net, player, board, multi, .4, 1, two, 0);
if(passed || index < 0){
printf("=%s pass\n\n", ids);
passed = 0;
@ -550,7 +645,7 @@ void engine_go(char *filename, char *weightfile, int multi)
row = 19 - row;
if (col >= 8) ++col;
printf("=%s %c%d\n\n", ids, 'A' + col, row);
print_board(board, 1, 0);
//print_board(board, 1, 0);
}
} else if (!strcmp(buff, "p")){
@ -562,19 +657,10 @@ void engine_go(char *filename, char *weightfile, int multi)
char *line = fgetl(stdin);
free(line);
if(type[0] == 'd' || type[0] == 'D'){
int i;
FILE *f = fopen("game.txt", "w");
int i, j;
int count = 2;
fprintf(f, "boardsize 19\n");
fprintf(f, "clear_board\n");
for(j = 0; j < 19; ++j){
for(i = 0; i < 19; ++i){
if(board[j*19 + i] == 1) fprintf(f, "play black %c%d\n", 'A'+i+(i>=8), 19-j);
if(board[j*19 + i] == -1) fprintf(f, "play white %c%d\n", 'A'+i+(i>=8), 19-j);
if(board[j*19 + i]) ++count;
}
}
fprintf(f, "final_status_list dead\n");
int count = print_game(board, f);
fprintf(f, "%s final_status_list dead\n", ids);
fclose(f);
FILE *p = popen("./gnugo --mode gtp < game.txt", "r");
for(i = 0; i < count; ++i){
@ -608,44 +694,25 @@ void test_go(char *cfg, char *weights, int multi)
srand(time(0));
set_batch_network(&net, 1);
float *board = calloc(19*19, sizeof(float));
float *move = calloc(19*19, sizeof(float));
float *move = calloc(19*19+1, sizeof(float));
int color = 1;
while(1){
float *output = network_predict(net, board);
copy_cpu(19*19, output, 1, move, 1);
int i;
if(multi){
image bim = float_to_image(19, 19, 1, board);
for(i = 1; i < 8; ++i){
rotate_image_cw(bim, i);
if(i >= 4) flip_image(bim);
float *output = network_predict(net, board);
image oim = float_to_image(19, 19, 1, output);
if(i >= 4) flip_image(oim);
rotate_image_cw(oim, -i);
axpy_cpu(19*19, 1, output, 1, move, 1);
if(i >= 4) flip_image(bim);
rotate_image_cw(bim, -i);
}
scal_cpu(19*19, 1./8., move, 1);
}
for(i = 0; i < 19*19; ++i){
if(board[i]) move[i] = 0;
}
predict_move(net, board, move, multi);
int indexes[nind];
int row, col;
top_k(move, 19*19, nind, indexes);
print_board(board, color, indexes);
top_k(move, 19*19+1, nind, indexes);
print_board(stderr, board, color, indexes);
for(i = 0; i < nind; ++i){
int index = indexes[i];
row = index / 19;
col = index % 19;
printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100);
if(row == 19){
printf("%d: Pass, %.2f%%\n", i+1, move[index]*100);
} else {
printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100);
}
}
//if(color == 1) printf("\u25EF Enter move: ");
//else printf("\u25C9 Enter move: ");
@ -663,7 +730,9 @@ void test_go(char *cfg, char *weights, int multi)
int index = indexes[picked];
row = index / 19;
col = index % 19;
board[row*19 + col] = 1;
if(row < 19){
move_go(board, 1, row, col);
}
}
} else if (cnum){
if (c <= 'T' && c >= 'A'){
@ -671,7 +740,7 @@ void test_go(char *cfg, char *weights, int multi)
row = (inverted)?19 - row : row-1;
col = c - 'A';
if (col > 7 && noi) col -= 1;
if (num == 2) board[row*19 + col] = 1;
if (num == 2) move_go(board, 1, row, col);
} else if (c == 'p') {
// Pass
} else if(c=='b' || c == 'w'){
@ -698,19 +767,9 @@ void test_go(char *cfg, char *weights, int multi)
float score_game(float *board)
{
int i;
FILE *f = fopen("game.txt", "w");
int i, j;
int count = 3;
fprintf(f, "komi 6.5\n");
fprintf(f, "boardsize 19\n");
fprintf(f, "clear_board\n");
for(j = 0; j < 19; ++j){
for(i = 0; i < 19; ++i){
if(board[j*19 + i] == 1) fprintf(f, "play black %c%d\n", 'A'+i+(i>=8), 19-j);
if(board[j*19 + i] == -1) fprintf(f, "play white %c%d\n", 'A'+i+(i>=8), 19-j);
if(board[j*19 + i]) ++count;
}
}
int count = print_game(board, f);
fprintf(f, "final_score\n");
fclose(f);
FILE *p = popen("./gnugo --mode gtp < game.txt", "r");
@ -747,7 +806,7 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
}
}
srand(time(0));
char boards[300][93];
char boards[600][93];
int count = 0;
set_batch_network(&net, 1);
set_batch_network(&net2, 1);
@ -760,13 +819,15 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
int p2 = 0;
int total = 0;
while(1){
if (done || count >= 300){
if (done){
float score = score_game(board);
int i = (score > 0)? 0 : 1;
if((score > 0) == (total%2==0)) ++p1;
else ++p2;
++total;
fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total);
sleep(1);
/*
int i = (score > 0)? 0 : 1;
int j;
for(; i < count; i += 2){
for(j = 0; j < 93; ++j){
@ -774,6 +835,7 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
}
printf("\n");
}
*/
memset(board, 0, 19*19*sizeof(float));
player = 1;
done = 0;
@ -781,10 +843,10 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
fflush(stdout);
fflush(stderr);
}
//print_board(board, 1, 0);
print_board(stderr, board, 1, 0);
//sleep(1);
network use = ((total%2==0) == (player==1)) ? net : net2;
int index = generate_move(use, player, board, multi, .1, .7, two, 0);
int index = generate_move(use, player, board, multi, .4, 1, two, 0);
if(index < 0){
done = 1;
continue;
@ -818,13 +880,37 @@ void run_go(int argc, char **argv)
return;
}
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
if(gpu_list){
printf("%s\n", gpu_list);
int len = strlen(gpu_list);
ngpus = 1;
int i;
for(i = 0; i < len; ++i){
if (gpu_list[i] == ',') ++ngpus;
}
gpus = calloc(ngpus, sizeof(int));
for(i = 0; i < ngpus; ++i){
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',')+1;
}
} else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
}
int clear = find_arg(argc, argv, "-clear");
char *cfg = argv[3];
char *weights = (argc > 4) ? argv[4] : 0;
char *c2 = (argc > 5) ? argv[5] : 0;
char *w2 = (argc > 6) ? argv[6] : 0;
int multi = find_arg(argc, argv, "-multi");
if(0==strcmp(argv[2], "train")) train_go(cfg, weights);
else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi);
if(0==strcmp(argv[2], "train")) train_go(cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2);
else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi);
else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi);
else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, multi);

View File

@ -613,6 +613,21 @@ image float_to_image(int w, int h, int c, float *data)
return out;
}
void place_image(image im, int w, int h, int dx, int dy, image canvas)
{
int x, y, c;
for(c = 0; c < im.c; ++c){
for(y = 0; y < h; ++y){
for(x = 0; x < w; ++x){
int rx = ((float)x / w) * im.w;
int ry = ((float)y / h) * im.h;
float val = bilinear_interpolate(im, rx, ry, c);
set_pixel(canvas, x + dx, y + dy, c, val);
}
}
}
}
image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect)
{
@ -652,6 +667,12 @@ image rotate_image(image im, float rad)
return rot;
}
void fill_image(image m, float s)
{
int i;
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s;
}
void translate_image(image m, float s)
{
int i;
@ -753,6 +774,27 @@ void composite_3d(char *f1, char *f2, char *out, int delta)
#endif
}
image letterbox_image(image im, int w, int h)
{
int new_w = im.w;
int new_h = im.h;
if (((float)w/im.w) < ((float)h/im.h)) {
new_w = w;
new_h = (im.h * w)/im.w;
} else {
new_h = h;
new_w = (im.w * h)/im.h;
}
image resized = resize_image(im, new_w, new_h);
image boxed = make_image(w, h, im.c);
fill_image(boxed, .5);
//int i;
//for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0;
embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2);
free_image(resized);
return boxed;
}
image resize_max(image im, int max)
{
int w = im.w;
@ -824,6 +866,52 @@ float three_way_min(float a, float b, float c)
return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ;
}
void yuv_to_rgb(image im)
{
assert(im.c == 3);
int i, j;
float r, g, b;
float y, u, v;
for(j = 0; j < im.h; ++j){
for(i = 0; i < im.w; ++i){
y = get_pixel(im, i , j, 0);
u = get_pixel(im, i , j, 1);
v = get_pixel(im, i , j, 2);
r = y + 1.13983*v;
g = y + -.39465*u + -.58060*v;
b = y + 2.03211*u;
set_pixel(im, i, j, 0, r);
set_pixel(im, i, j, 1, g);
set_pixel(im, i, j, 2, b);
}
}
}
void rgb_to_yuv(image im)
{
assert(im.c == 3);
int i, j;
float r, g, b;
float y, u, v;
for(j = 0; j < im.h; ++j){
for(i = 0; i < im.w; ++i){
r = get_pixel(im, i , j, 0);
g = get_pixel(im, i , j, 1);
b = get_pixel(im, i , j, 2);
y = .299*r + .587*g + .114*b;
u = -.14713*r + -.28886*g + .436*b;
v = .615*r + -.51499*g + -.10001*b;
set_pixel(im, i, j, 0, y);
set_pixel(im, i, j, 1, u);
set_pixel(im, i, j, 2, v);
}
}
}
// http://www.cs.rit.edu/~ncs/color/t_convert.html
void rgb_to_hsv(image im)
{
@ -903,12 +991,30 @@ void hsv_to_rgb(image im)
}
}
void grayscale_image_3c(image im)
{
assert(im.c == 3);
int i, j, k;
float scale[] = {0.299, 0.587, 0.114};
for(j = 0; j < im.h; ++j){
for(i = 0; i < im.w; ++i){
float val = 0;
for(k = 0; k < 3; ++k){
val += scale[k]*get_pixel(im, i, j, k);
}
im.data[0*im.h*im.w + im.w*j + i] = val;
im.data[1*im.h*im.w + im.w*j + i] = val;
im.data[2*im.h*im.w + im.w*j + i] = val;
}
}
}
image grayscale_image(image im)
{
assert(im.c == 3);
int i, j, k;
image gray = make_image(im.w, im.h, 1);
float scale[] = {0.587, 0.299, 0.114};
float scale[] = {0.299, 0.587, 0.114};
for(k = 0; k < im.c; ++k){
for(j = 0; j < im.h; ++j){
for(i = 0; i < im.w; ++i){

View File

@ -29,25 +29,32 @@ image crop_image(image im, int dx, int dy, int w, int h);
image random_crop_image(image im, int w, int h);
image random_augment_image(image im, float angle, float aspect, int low, int high, int size);
void random_distort_image(image im, float hue, float saturation, float exposure);
image letterbox_image(image im, int w, int h);
image resize_image(image im, int w, int h);
image resize_min(image im, int min);
image resize_max(image im, int max);
void fill_image(image m, float s);
void translate_image(image m, float s);
void normalize_image(image p);
image rotate_image(image m, float rad);
void rotate_image_cw(image im, int times);
void embed_image(image source, image dest, int dx, int dy);
void place_image(image im, int w, int h, int dx, int dy, image canvas);
void saturate_image(image im, float sat);
void exposure_image(image im, float sat);
void distort_image(image im, float hue, float sat, float val);
void saturate_exposure_image(image im, float sat, float exposure);
void rgb_to_hsv(image im);
void hsv_to_rgb(image im);
void yuv_to_rgb(image im);
void rgb_to_yuv(image im);
void rgbgr_image(image im);
void constrain_image(image im);
void composite_3d(char *f1, char *f2, char *out, int delta);
int best_3d_shift_r(image a, image b, int min, int max);
image grayscale_image(image im);
void grayscale_image_3c(image im);
image threshold_image(image im, float thresh);
image collapse_image_layers(image source, int border);

View File

@ -32,7 +32,6 @@ void free_layer(layer l)
if(l.scale_updates) free(l.scale_updates);
if(l.weights) free(l.weights);
if(l.weight_updates) free(l.weight_updates);
if(l.col_image) free(l.col_image);
if(l.delta) free(l.delta);
if(l.output) free(l.output);
if(l.squared) free(l.squared);
@ -80,7 +79,6 @@ void free_layer(layer l)
if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu);
if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu);
if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu);
if(l.col_image_gpu) cuda_free(l.col_image_gpu);
if(l.x_gpu) cuda_free(l.x_gpu);
if(l.x_norm_gpu) cuda_free(l.x_norm_gpu);
if(l.weights_gpu) cuda_free(l.weights_gpu);

View File

@ -38,7 +38,7 @@ typedef enum {
} LAYER_TYPE;
typedef enum{
SSE, MASKED, SMOOTH
SSE, MASKED, L1, SMOOTH
} COST_TYPE;
struct layer{
@ -58,6 +58,7 @@ struct layer{
int flipped;
int inputs;
int outputs;
int extra;
int truths;
int h,w,c;
int out_h, out_w, out_c;
@ -68,6 +69,7 @@ struct layer{
int side;
int stride;
int reverse;
int flatten;
int pad;
int sqrt;
int flip;
@ -76,6 +78,8 @@ struct layer{
int xnor;
int steps;
int hidden;
int truth;
float smooth;
float dot;
float angle;
float jitter;
@ -83,6 +87,7 @@ struct layer{
float exposure;
float shift;
float ratio;
float learning_rate_scale;
int softmax;
int classes;
int coords;
@ -115,6 +120,8 @@ struct layer{
int classfix;
int absolute;
int onlyforward;
int stopbackward;
int dontload;
int dontloadscales;
@ -149,7 +156,6 @@ struct layer{
float * weights;
float * weight_updates;
float * col_image;
float * delta;
float * output;
float * squared;
@ -235,8 +241,6 @@ struct layer{
float * variance_delta_gpu;
float * mean_delta_gpu;
float * col_image_gpu;
float * x_gpu;
float * x_norm_gpu;
float * weights_gpu;
@ -256,6 +260,7 @@ struct layer{
#ifdef CUDNN
cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc;
cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
cudnnTensorDescriptor_t normTensorDesc;
cudnnFilterDescriptor_t weightDesc;
cudnnFilterDescriptor_t dweightDesc;
cudnnConvolutionDescriptor_t convDesc;

View File

@ -57,9 +57,10 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
float scale = sqrt(2./(size*size*c));
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
l.workspace_size = out_h*out_w*size*size*c;
l.forward = forward_local_layer;
l.backward = backward_local_layer;
@ -76,7 +77,6 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
l.biases_gpu = cuda_make_array(l.biases, l.outputs);
l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs);
l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c);
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
@ -102,11 +102,11 @@ void forward_local_layer(const local_layer l, network_state state)
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
im2col_cpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, l.col_image);
l.size, l.stride, l.pad, state.workspace);
float *output = l.output + i*l.outputs;
for(j = 0; j < locations; ++j){
float *a = l.weights + j*l.size*l.size*l.c*l.n;
float *b = l.col_image + j;
float *b = state.workspace + j;
float *c = output + j;
int m = l.n;
@ -133,11 +133,11 @@ void backward_local_layer(local_layer l, network_state state)
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
im2col_cpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, l.col_image);
l.size, l.stride, l.pad, state.workspace);
for(j = 0; j < locations; ++j){
float *a = l.delta + i*l.outputs + j;
float *b = l.col_image + j;
float *b = state.workspace + j;
float *c = l.weight_updates + j*l.size*l.size*l.c*l.n;
int m = l.n;
int n = l.size*l.size*l.c;
@ -150,7 +150,7 @@ void backward_local_layer(local_layer l, network_state state)
for(j = 0; j < locations; ++j){
float *a = l.weights + j*l.size*l.size*l.c*l.n;
float *b = l.delta + i*l.outputs + j;
float *c = l.col_image + j;
float *c = state.workspace + j;
int m = l.size*l.size*l.c;
int n = 1;
@ -159,7 +159,7 @@ void backward_local_layer(local_layer l, network_state state)
gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
}
col2im_cpu(l.col_image, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
col2im_cpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
}
}
}
@ -192,11 +192,11 @@ void forward_local_layer_gpu(const local_layer l, network_state state)
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
im2col_ongpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, l.col_image_gpu);
l.size, l.stride, l.pad, state.workspace);
float *output = l.output_gpu + i*l.outputs;
for(j = 0; j < locations; ++j){
float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
float *b = l.col_image_gpu + j;
float *b = state.workspace + j;
float *c = output + j;
int m = l.n;
@ -222,11 +222,11 @@ void backward_local_layer_gpu(local_layer l, network_state state)
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
im2col_ongpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, l.col_image_gpu);
l.size, l.stride, l.pad, state.workspace);
for(j = 0; j < locations; ++j){
float *a = l.delta_gpu + i*l.outputs + j;
float *b = l.col_image_gpu + j;
float *b = state.workspace + j;
float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n;
int m = l.n;
int n = l.size*l.size*l.c;
@ -239,7 +239,7 @@ void backward_local_layer_gpu(local_layer l, network_state state)
for(j = 0; j < locations; ++j){
float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
float *b = l.delta_gpu + i*l.outputs + j;
float *c = l.col_image_gpu + j;
float *c = state.workspace + j;
int m = l.size*l.size*l.c;
int n = 1;
@ -248,7 +248,7 @@ void backward_local_layer_gpu(local_layer l, network_state state)
gemm_ongpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
}
col2im_ongpu(l.col_image_gpu, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
col2im_ongpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
}
}
}

924
src/lsd.c Normal file
View File

@ -0,0 +1,924 @@
#include "network.h"
#include "cost_layer.h"
#include "utils.h"
#include "parser.h"
#include "blas.h"
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#endif
void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear)
{
#ifdef GPU
//char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list";
//char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
char *style_images = "/home/pjreddie/zelda.txt";
char *backup_directory = "/home/pjreddie/backup/";
srand(time(0));
network fnet = load_network(fcfg, fweight, clear);
network gnet = load_network(gcfg, gweight, clear);
network anet = load_network(acfg, aweight, clear);
char *gbase = basecfg(gcfg);
char *abase = basecfg(acfg);
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet.learning_rate, gnet.momentum, gnet.decay);
int imgs = gnet.batch*gnet.subdivisions;
int i = *gnet.seen/imgs;
data train, tbuffer;
data style, sbuffer;
list *slist = get_paths(style_images);
char **spaths = (char **)list_to_array(slist);
list *tlist = get_paths(train_images);
char **tpaths = (char **)list_to_array(tlist);
load_args targs= get_base_args(gnet);
targs.paths = tpaths;
targs.n = imgs;
targs.m = tlist->size;
targs.d = &tbuffer;
targs.type = CLASSIFICATION_DATA;
targs.classes = 1;
char *ls[1] = {"zelda"};
targs.labels = ls;
load_args sargs = get_base_args(gnet);
sargs.paths = spaths;
sargs.n = imgs;
sargs.m = slist->size;
sargs.d = &sbuffer;
sargs.type = CLASSIFICATION_DATA;
sargs.classes = 1;
sargs.labels = ls;
pthread_t tload_thread = load_data_in_thread(targs);
pthread_t sload_thread = load_data_in_thread(sargs);
clock_t time;
float aloss_avg = -1;
float floss_avg = -1;
network_state fstate = {0};
fstate.index = 0;
fstate.net = fnet;
int x_size = get_network_input_size(fnet)*fnet.batch;
int y_size = get_network_output_size(fnet)*fnet.batch;
fstate.input = cuda_make_array(0, x_size);
fstate.truth = cuda_make_array(0, y_size);
fstate.delta = cuda_make_array(0, x_size);
fstate.train = 1;
float *X = calloc(x_size, sizeof(float));
float *y = calloc(y_size, sizeof(float));
float *ones = cuda_make_array(0, anet.batch);
float *zeros = cuda_make_array(0, anet.batch);
fill_ongpu(anet.batch, .99, ones, 1);
fill_ongpu(anet.batch, .01, zeros, 1);
network_state astate = {0};
astate.index = 0;
astate.net = anet;
int ax_size = get_network_input_size(anet)*anet.batch;
int ay_size = get_network_output_size(anet)*anet.batch;
astate.input = 0;
astate.truth = ones;
astate.delta = cuda_make_array(0, ax_size);
astate.train = 1;
network_state gstate = {0};
gstate.index = 0;
gstate.net = gnet;
int gx_size = get_network_input_size(gnet)*gnet.batch;
int gy_size = get_network_output_size(gnet)*gnet.batch;
gstate.input = cuda_make_array(0, gx_size);
gstate.truth = 0;
gstate.delta = 0;
gstate.train = 1;
while (get_current_batch(gnet) < gnet.max_batches) {
i += 1;
time=clock();
pthread_join(tload_thread, 0);
pthread_join(sload_thread, 0);
train = tbuffer;
style = sbuffer;
tload_thread = load_data_in_thread(targs);
sload_thread = load_data_in_thread(sargs);
printf("Loaded: %lf seconds\n", sec(clock()-time));
data generated = copy_data(train);
time=clock();
int j, k;
float floss = 0;
for(j = 0; j < fnet.subdivisions; ++j){
layer imlayer = gnet.layers[gnet.n - 1];
get_next_batch(train, fnet.batch, j*fnet.batch, X, y);
cuda_push_array(fstate.input, X, x_size);
cuda_push_array(gstate.input, X, gx_size);
*gnet.seen += gnet.batch;
forward_network_gpu(fnet, fstate);
float *feats = fnet.layers[fnet.n - 2].output_gpu;
copy_ongpu(y_size, feats, 1, fstate.truth, 1);
forward_network_gpu(gnet, gstate);
float *gen = gnet.layers[gnet.n-1].output_gpu;
copy_ongpu(x_size, gen, 1, fstate.input, 1);
fill_ongpu(x_size, 0, fstate.delta, 1);
forward_network_gpu(fnet, fstate);
backward_network_gpu(fnet, fstate);
//HERE
astate.input = gen;
fill_ongpu(ax_size, 0, astate.delta, 1);
forward_network_gpu(anet, astate);
backward_network_gpu(anet, astate);
float *delta = imlayer.delta_gpu;
fill_ongpu(x_size, 0, delta, 1);
scal_ongpu(x_size, 100, astate.delta, 1);
scal_ongpu(x_size, .00001, fstate.delta, 1);
axpy_ongpu(x_size, 1, fstate.delta, 1, delta, 1);
axpy_ongpu(x_size, 1, astate.delta, 1, delta, 1);
//fill_ongpu(x_size, 0, delta, 1);
//cuda_push_array(delta, X, x_size);
//axpy_ongpu(x_size, -1, imlayer.output_gpu, 1, delta, 1);
//printf("pix error: %f\n", cuda_mag_array(delta, x_size));
printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size));
printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size));
//axpy_ongpu(x_size, 1, astate.delta, 1, delta, 1);
backward_network_gpu(gnet, gstate);
floss += get_network_cost(fnet) /(fnet.subdivisions*fnet.batch);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
for(k = 0; k < gnet.batch; ++k){
int index = j*gnet.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1);
generated.y.vals[index][0] = .01;
}
}
/*
image sim = float_to_image(anet.w, anet.h, anet.c, style.X.vals[j]);
show_image(sim, "style");
cvWaitKey(0);
*/
harmless_update_network_gpu(anet);
data merge = concat_data(style, generated);
randomize_data(merge);
float aloss = train_network(anet, merge);
update_network_gpu(gnet);
free_data(merge);
free_data(train);
free_data(generated);
free_data(style);
if (aloss_avg < 0) aloss_avg = aloss;
if (floss_avg < 0) floss_avg = floss;
aloss_avg = aloss_avg*.9 + aloss*.1;
floss_avg = floss_avg*.9 + floss*.1;
printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs);
if(i%1000==0){
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i);
save_weights(gnet, buff);
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
save_weights(anet, buff);
}
if(i%100==0){
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, gbase);
save_weights(gnet, buff);
sprintf(buff, "%s/%s.backup", backup_directory, abase);
save_weights(anet, buff);
}
}
#endif
}
void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear)
{
#ifdef GPU
//char *train_images = "/home/pjreddie/data/coco/train1.txt";
//char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list";
char *backup_directory = "/home/pjreddie/backup/";
srand(time(0));
char *base = basecfg(cfg);
char *abase = basecfg(acfg);
printf("%s\n", base);
network net = load_network(cfg, weight, clear);
network anet = load_network(acfg, aweight, clear);
int i, j, k;
layer imlayer = {0};
for (i = 0; i < net.n; ++i) {
if (net.layers[i].out_c == 3) {
imlayer = net.layers[i];
break;
}
}
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
i = *net.seen/imgs;
data train, buffer;
list *plist = get_paths(train_images);
//int N = plist->size;
char **paths = (char **)list_to_array(plist);
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.d = &buffer;
args.min = net.min_crop;
args.max = net.max_crop;
args.angle = net.angle;
args.aspect = net.aspect;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
args.size = net.w;
args.type = CLASSIFICATION_DATA;
args.classes = 1;
char *ls[1] = {"coco"};
args.labels = ls;
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
network_state gstate = {0};
gstate.index = 0;
gstate.net = net;
int x_size = get_network_input_size(net)*net.batch;
int y_size = x_size;
gstate.input = cuda_make_array(0, x_size);
gstate.truth = cuda_make_array(0, y_size);
gstate.delta = 0;
gstate.train = 1;
float *pixs = calloc(x_size, sizeof(float));
float *graypixs = calloc(x_size, sizeof(float));
float *y = calloc(y_size, sizeof(float));
network_state astate = {0};
astate.index = 0;
astate.net = anet;
int ay_size = get_network_output_size(anet)*anet.batch;
astate.input = 0;
astate.truth = 0;
astate.delta = 0;
astate.train = 1;
float *imerror = cuda_make_array(0, imlayer.outputs);
float *ones_gpu = cuda_make_array(0, ay_size);
fill_ongpu(ay_size, .9, ones_gpu, 1);
float aloss_avg = -1;
float gloss_avg = -1;
//data generated = copy_data(train);
while (get_current_batch(net) < net.max_batches) {
i += 1;
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data_in_thread(args);
printf("Loaded: %lf seconds\n", sec(clock()-time));
data gray = copy_data(train);
for(j = 0; j < imgs; ++j){
image gim = float_to_image(net.w, net.h, net.c, gray.X.vals[j]);
grayscale_image_3c(gim);
train.y.vals[j][0] = .9;
image yim = float_to_image(net.w, net.h, net.c, train.X.vals[j]);
//rgb_to_yuv(yim);
}
time=clock();
float gloss = 0;
for(j = 0; j < net.subdivisions; ++j){
get_next_batch(train, net.batch, j*net.batch, pixs, y);
get_next_batch(gray, net.batch, j*net.batch, graypixs, y);
cuda_push_array(gstate.input, graypixs, x_size);
cuda_push_array(gstate.truth, pixs, x_size);
/*
image origi = float_to_image(net.w, net.h, 3, pixs);
image grayi = float_to_image(net.w, net.h, 3, graypixs);
show_image(grayi, "gray");
show_image(origi, "orig");
cvWaitKey(0);
*/
*net.seen += net.batch;
forward_network_gpu(net, gstate);
fill_ongpu(imlayer.outputs, 0, imerror, 1);
astate.input = imlayer.output_gpu;
astate.delta = imerror;
astate.truth = ones_gpu;
forward_network_gpu(anet, astate);
backward_network_gpu(anet, astate);
scal_ongpu(imlayer.outputs, .1, net.layers[net.n-1].delta_gpu, 1);
backward_network_gpu(net, gstate);
scal_ongpu(imlayer.outputs, 100, imerror, 1);
printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs));
printf("features %f\n", cuda_mag_array(net.layers[net.n-1].delta_gpu, imlayer.outputs));
axpy_ongpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1);
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
for(k = 0; k < net.batch; ++k){
int index = j*net.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1);
gray.y.vals[index][0] = .1;
}
}
harmless_update_network_gpu(anet);
data merge = concat_data(train, gray);
randomize_data(merge);
float aloss = train_network(anet, merge);
update_network_gpu(net);
update_network_gpu(anet);
free_data(merge);
free_data(train);
free_data(gray);
if (aloss_avg < 0) aloss_avg = aloss;
aloss_avg = aloss_avg*.9 + aloss*.1;
gloss_avg = gloss_avg*.9 + gloss*.1;
printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs);
if(i%1000==0){
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
save_weights(anet, buff);
}
if(i%100==0){
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
sprintf(buff, "%s/%s.backup", backup_directory, abase);
save_weights(anet, buff);
}
}
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
#endif
}
void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear)
{
#ifdef GPU
//char *train_images = "/home/pjreddie/data/coco/train1.txt";
//char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list";
char *backup_directory = "/home/pjreddie/backup/";
srand(time(0));
char *base = basecfg(cfg);
char *abase = basecfg(acfg);
printf("%s\n", base);
network net = load_network(cfg, weight, clear);
network anet = load_network(acfg, aweight, clear);
int i, j, k;
layer imlayer = {0};
for (i = 0; i < net.n; ++i) {
if (net.layers[i].out_c == 3) {
imlayer = net.layers[i];
break;
}
}
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
i = *net.seen/imgs;
data train, buffer;
list *plist = get_paths(train_images);
//int N = plist->size;
char **paths = (char **)list_to_array(plist);
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.d = &buffer;
args.min = net.min_crop;
args.max = net.max_crop;
args.angle = net.angle;
args.aspect = net.aspect;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
args.size = net.w;
args.type = CLASSIFICATION_DATA;
args.classes = 1;
char *ls[1] = {"imagenet"};
args.labels = ls;
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
network_state gstate = {0};
gstate.index = 0;
gstate.net = net;
int x_size = get_network_input_size(net)*net.batch;
int y_size = x_size;
gstate.input = cuda_make_array(0, x_size);
gstate.truth = cuda_make_array(0, y_size);
gstate.delta = 0;
gstate.train = 1;
float *pixs = calloc(x_size, sizeof(float));
float *graypixs = calloc(x_size, sizeof(float));
float *y = calloc(y_size, sizeof(float));
network_state astate = {0};
astate.index = 0;
astate.net = anet;
int ay_size = get_network_output_size(anet)*anet.batch;
astate.input = 0;
astate.truth = 0;
astate.delta = 0;
astate.train = 1;
float *imerror = cuda_make_array(0, imlayer.outputs);
float *ones_gpu = cuda_make_array(0, ay_size);
fill_ongpu(ay_size, .99, ones_gpu, 1);
float aloss_avg = -1;
float gloss_avg = -1;
//data generated = copy_data(train);
while (get_current_batch(net) < net.max_batches) {
i += 1;
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data_in_thread(args);
printf("Loaded: %lf seconds\n", sec(clock()-time));
data gray = copy_data(train);
for(j = 0; j < imgs; ++j){
image gim = float_to_image(net.w, net.h, net.c, gray.X.vals[j]);
grayscale_image_3c(gim);
train.y.vals[j][0] = .99;
image yim = float_to_image(net.w, net.h, net.c, train.X.vals[j]);
//rgb_to_yuv(yim);
}
time=clock();
float gloss = 0;
for(j = 0; j < net.subdivisions; ++j){
get_next_batch(train, net.batch, j*net.batch, pixs, y);
get_next_batch(gray, net.batch, j*net.batch, graypixs, y);
cuda_push_array(gstate.input, graypixs, x_size);
cuda_push_array(gstate.truth, pixs, x_size);
/*
image origi = float_to_image(net.w, net.h, 3, pixs);
image grayi = float_to_image(net.w, net.h, 3, graypixs);
show_image(grayi, "gray");
show_image(origi, "orig");
cvWaitKey(0);
*/
*net.seen += net.batch;
forward_network_gpu(net, gstate);
fill_ongpu(imlayer.outputs, 0, imerror, 1);
astate.input = imlayer.output_gpu;
astate.delta = imerror;
astate.truth = ones_gpu;
forward_network_gpu(anet, astate);
backward_network_gpu(anet, astate);
scal_ongpu(imlayer.outputs, .1, net.layers[net.n-1].delta_gpu, 1);
backward_network_gpu(net, gstate);
scal_ongpu(imlayer.outputs, 100, imerror, 1);
printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs));
printf("features %f\n", cuda_mag_array(net.layers[net.n-1].delta_gpu, imlayer.outputs));
axpy_ongpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1);
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
for(k = 0; k < net.batch; ++k){
int index = j*net.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1);
gray.y.vals[index][0] = .01;
}
}
harmless_update_network_gpu(anet);
data merge = concat_data(train, gray);
randomize_data(merge);
float aloss = train_network(anet, merge);
update_network_gpu(net);
update_network_gpu(anet);
free_data(merge);
free_data(train);
free_data(gray);
if (aloss_avg < 0) aloss_avg = aloss;
aloss_avg = aloss_avg*.9 + aloss*.1;
gloss_avg = gloss_avg*.9 + gloss*.1;
printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs);
if(i%1000==0){
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
save_weights(anet, buff);
}
if(i%100==0){
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
sprintf(buff, "%s/%s.backup", backup_directory, abase);
save_weights(anet, buff);
}
}
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
#endif
}
void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear)
{
#ifdef GPU
char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
char *backup_directory = "/home/pjreddie/backup/";
srand(time(0));
char *base = basecfg(cfgfile);
printf("%s\n", base);
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
if(clear) *net.seen = 0;
char *abase = basecfg(acfgfile);
network anet = parse_network_cfg(acfgfile);
if(aweightfile){
load_weights(&anet, aweightfile);
}
if(clear) *anet.seen = 0;
int i, j, k;
layer imlayer = {0};
for (i = 0; i < net.n; ++i) {
if (net.layers[i].out_c == 3) {
imlayer = net.layers[i];
break;
}
}
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
i = *net.seen/imgs;
data train, buffer;
list *plist = get_paths(train_images);
//int N = plist->size;
char **paths = (char **)list_to_array(plist);
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.d = &buffer;
args.min = net.min_crop;
args.max = net.max_crop;
args.angle = net.angle;
args.aspect = net.aspect;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
args.size = net.w;
args.type = CLASSIFICATION_DATA;
args.classes = 1;
char *ls[1] = {"coco"};
args.labels = ls;
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
network_state gstate = {0};
gstate.index = 0;
gstate.net = net;
int x_size = get_network_input_size(net)*net.batch;
int y_size = 1*net.batch;
gstate.input = cuda_make_array(0, x_size);
gstate.truth = 0;
gstate.delta = 0;
gstate.train = 1;
float *X = calloc(x_size, sizeof(float));
float *y = calloc(y_size, sizeof(float));
network_state astate = {0};
astate.index = 0;
astate.net = anet;
int ay_size = get_network_output_size(anet)*anet.batch;
astate.input = 0;
astate.truth = 0;
astate.delta = 0;
astate.train = 1;
float *imerror = cuda_make_array(0, imlayer.outputs);
float *ones_gpu = cuda_make_array(0, ay_size);
fill_ongpu(ay_size, 1, ones_gpu, 1);
float aloss_avg = -1;
float gloss_avg = -1;
//data generated = copy_data(train);
while (get_current_batch(net) < net.max_batches) {
i += 1;
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data_in_thread(args);
printf("Loaded: %lf seconds\n", sec(clock()-time));
data generated = copy_data(train);
time=clock();
float gloss = 0;
for(j = 0; j < net.subdivisions; ++j){
get_next_batch(train, net.batch, j*net.batch, X, y);
cuda_push_array(gstate.input, X, x_size);
*net.seen += net.batch;
forward_network_gpu(net, gstate);
fill_ongpu(imlayer.outputs, 0, imerror, 1);
astate.input = imlayer.output_gpu;
astate.delta = imerror;
astate.truth = ones_gpu;
forward_network_gpu(anet, astate);
backward_network_gpu(anet, astate);
scal_ongpu(imlayer.outputs, 1, imerror, 1);
axpy_ongpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1);
backward_network_gpu(net, gstate);
printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs));
printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs));
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
for(k = 0; k < net.batch; ++k){
int index = j*net.batch + k;
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1);
generated.y.vals[index][0] = 0;
}
}
harmless_update_network_gpu(anet);
data merge = concat_data(train, generated);
randomize_data(merge);
float aloss = train_network(anet, merge);
update_network_gpu(net);
update_network_gpu(anet);
free_data(merge);
free_data(train);
free_data(generated);
if (aloss_avg < 0) aloss_avg = aloss;
aloss_avg = aloss_avg*.9 + aloss*.1;
gloss_avg = gloss_avg*.9 + gloss*.1;
printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs);
if(i%1000==0){
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
save_weights(anet, buff);
}
if(i%100==0){
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
sprintf(buff, "%s/%s.backup", backup_directory, abase);
save_weights(anet, buff);
}
}
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
#endif
}
void train_lsd(char *cfgfile, char *weightfile, int clear)
{
char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
char *backup_directory = "/home/pjreddie/backup/";
srand(time(0));
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
if(clear) *net.seen = 0;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
int i = *net.seen/imgs;
data train, buffer;
list *plist = get_paths(train_images);
//int N = plist->size;
char **paths = (char **)list_to_array(plist);
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.d = &buffer;
args.min = net.min_crop;
args.max = net.max_crop;
args.angle = net.angle;
args.aspect = net.aspect;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
args.size = net.w;
args.type = CLASSIFICATION_DATA;
args.classes = 1;
char *ls[1] = {"coco"};
args.labels = ls;
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
//while(i*imgs < N*120){
while(get_current_batch(net) < net.max_batches){
i += 1;
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data_in_thread(args);
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
float loss = train_network(net, train);
if (avg_loss < 0) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
if(i%1000==0){
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
if(i%100==0){
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
}
free_data(train);
}
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
}
void test_lsd(char *cfgfile, char *weightfile, char *filename)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 1);
srand(2222222);
clock_t time;
char buff[256];
char *input = buff;
int i, imlayer = 0;
for (i = 0; i < net.n; ++i) {
if (net.layers[i].out_c == 3) {
imlayer = i;
printf("%d\n", i);
break;
}
}
while(1){
if(filename){
strncpy(input, filename, 256);
}else{
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input, 0, 0);
image resized = resize_min(im, net.w);
image crop = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h);
//grayscale_image_3c(crop);
float *X = crop.data;
time=clock();
network_predict(net, X);
image out = get_network_image_layer(net, imlayer);
//yuv_to_rgb(out);
constrain_image(out);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
show_image(out, "out");
show_image(crop, "crop");
save_image(out, "out");
#ifdef OPENCV
cvWaitKey(0);
#endif
free_image(im);
free_image(resized);
free_image(crop);
if (filename) break;
}
}
void run_lsd(int argc, char **argv)
{
if(argc < 4){
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
int clear = find_arg(argc, argv, "-clear");
char *cfg = argv[3];
char *weights = (argc > 4) ? argv[4] : 0;
char *filename = (argc > 5) ? argv[5] : 0;
char *acfg = argv[5];
char *aweights = (argc > 6) ? argv[6] : 0;
if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear);
else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear);
else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear);
else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear);
else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename);
/*
else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights);
*/
}

View File

@ -1,5 +1,6 @@
#include "matrix.h"
#include "utils.h"
#include "blas.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -73,6 +74,20 @@ void matrix_add_matrix(matrix from, matrix to)
}
}
matrix copy_matrix(matrix m)
{
matrix c = {0};
c.rows = m.rows;
c.cols = m.cols;
c.vals = calloc(c.rows, sizeof(float *));
int i;
for(i = 0; i < c.rows; ++i){
c.vals[i] = calloc(c.cols, sizeof(float));
copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1);
}
return c;
}
matrix make_matrix(int rows, int cols)
{
int i;

View File

@ -6,6 +6,7 @@ typedef struct matrix{
} matrix;
matrix make_matrix(int rows, int cols);
matrix copy_matrix(matrix m);
void free_matrix(matrix m);
void print_matrix(matrix m);

View File

@ -27,6 +27,35 @@
#include "dropout_layer.h"
#include "route_layer.h"
#include "shortcut_layer.h"
#include "parser.h"
#include "data.h"
load_args get_base_args(network net)
{
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.size = net.w;
args.min = net.min_crop;
args.max = net.max_crop;
args.angle = net.angle;
args.aspect = net.aspect;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
return args;
}
network load_network(char *cfg, char *weights, int clear)
{
network net = parse_network_cfg(cfg);
if(weights && weights[0] != 0){
load_weights(&net, weights);
}
if(clear) *net.seen = 0;
return net;
}
int get_current_batch(network net)
{
@ -50,6 +79,7 @@ float get_current_rate(network net)
int batch_num = get_current_batch(net);
int i;
float rate;
if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power);
switch (net.policy) {
case CONSTANT:
return net.learning_rate;
@ -66,7 +96,6 @@ float get_current_rate(network net)
case EXP:
return net.learning_rate * pow(net.gamma, batch_num);
case POLY:
if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power);
return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power);
case RANDOM:
return net.learning_rate * pow(rand_uniform(0,1), net.power);
@ -150,7 +179,7 @@ void forward_network(network net, network_state state)
state.index = i;
layer l = net.layers[i];
if(l.delta){
scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
}
l.forward(l, state);
state.input = l.output;
@ -165,7 +194,7 @@ void update_network(network net)
for(i = 0; i < net.n; ++i){
layer l = net.layers[i];
if(l.update){
l.update(l, update_batch, rate, net.momentum, net.decay);
l.update(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
}
}
}
@ -218,6 +247,7 @@ void backward_network(network net, network_state state)
state.delta = prev.delta;
}
layer l = net.layers[i];
if(l.stopbackward) break;
l.backward(l, state);
}
}
@ -414,6 +444,9 @@ detection_layer get_network_detection_layer(network net)
image get_network_image_layer(network net, int i)
{
layer l = net.layers[i];
#ifdef GPU
cuda_pull_array(l.output_gpu, l.output, l.outputs);
#endif
if (l.out_w && l.out_h && l.out_c){
return float_to_image(l.out_w, l.out_h, l.out_c, l.output);
}

View File

@ -43,6 +43,7 @@ typedef struct network{
float eps;
int inputs;
int notruth;
int h, w, c;
int max_crop;
int min_crop;
@ -82,6 +83,7 @@ float *get_network_output_gpu(network net);
void forward_network_gpu(network net, network_state state);
void backward_network_gpu(network net, network_state state);
void update_network_gpu(network net);
void harmless_update_network_gpu(network net);
#endif
float get_current_rate(network net);
@ -121,6 +123,8 @@ int resize_network(network *net, int w, int h);
void set_batch_network(network *net, int b);
int get_network_input_size(network net);
float get_network_cost(network net);
network load_network(char *cfg, char *weights, int clear);
load_args get_base_args(network net);
int get_network_nuisance(network net);
int get_network_background(network net);

View File

@ -50,8 +50,10 @@ void forward_network_gpu(network net, network_state state)
if(l.delta_gpu){
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
}
//if(l.c ==3 && i > 5) state.input = *net.input_gpu;
l.forward_gpu(l, state);
state.input = l.output_gpu;
if(l.truth) state.truth = l.output_gpu;
}
}
@ -64,6 +66,7 @@ void backward_network_gpu(network net, network_state state)
for(i = net.n-1; i >= 0; --i){
state.index = i;
layer l = net.layers[i];
if(l.stopbackward) break;
if(i == 0){
state.input = original_input;
state.delta = original_delta;
@ -86,11 +89,18 @@ void update_network_gpu(network net)
layer l = net.layers[i];
l.t = get_current_batch(net);
if(l.update_gpu){
l.update_gpu(l, update_batch, rate, net.momentum, net.decay);
l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
}
}
}
void harmless_update_network_gpu(network net)
{
net.learning_rate = 0;
net.momentum = 1;
update_network_gpu(net);
}
void forward_backward_network_gpu(network net, float *x, float *y)
{
network_state state;
@ -101,10 +111,10 @@ void forward_backward_network_gpu(network net, float *x, float *y)
if(net.layers[net.n-1].truths) y_size = net.layers[net.n-1].truths*net.batch;
if(!*net.input_gpu){
*net.input_gpu = cuda_make_array(x, x_size);
*net.truth_gpu = cuda_make_array(y, y_size);
if(!net.notruth) *net.truth_gpu = cuda_make_array(y, y_size);
}else{
cuda_push_array(*net.input_gpu, x, x_size);
cuda_push_array(*net.truth_gpu, y, y_size);
if(!net.notruth) cuda_push_array(*net.truth_gpu, y, y_size);
}
state.input = *net.input_gpu;
state.delta = 0;
@ -180,7 +190,7 @@ void update_layer(layer l, network net)
float rate = get_current_rate(net);
l.t = get_current_batch(net);
if(l.update_gpu){
l.update_gpu(l, update_batch, rate, net.momentum, net.decay);
l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
}
}

View File

@ -52,6 +52,7 @@ void optimize_picture(network *net, image orig, int max_layer, float scale, floa
image delta = make_image(im.w, im.h, im.c);
network_state state = {0};
state.net = *net;
#ifdef GPU
state.input = cuda_make_array(im.data, im.w*im.h*im.c);
@ -142,6 +143,7 @@ void reconstruct_picture(network net, float *features, image recon, image update
image delta = make_image(recon.w, recon.h, recon.c);
network_state state = {0};
state.net = net;
#ifdef GPU
state.input = cuda_make_array(recon.data, recon.w*recon.h*recon.c);
state.delta = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
@ -178,6 +180,113 @@ void reconstruct_picture(network net, float *features, image recon, image update
}
}
/*
void run_lsd(int argc, char **argv)
{
srand(0);
if(argc < 3){
fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]);
return;
}
char *cfg = argv[2];
char *weights = argv[3];
char *input = argv[4];
int norm = find_int_arg(argc, argv, "-norm", 1);
int rounds = find_int_arg(argc, argv, "-rounds", 1);
int iters = find_int_arg(argc, argv, "-iters", 10);
float rate = find_float_arg(argc, argv, "-rate", .04);
float momentum = find_float_arg(argc, argv, "-momentum", .9);
float lambda = find_float_arg(argc, argv, "-lambda", .01);
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
int reconstruct = find_arg(argc, argv, "-reconstruct");
int smooth_size = find_int_arg(argc, argv, "-smooth", 1);
network net = parse_network_cfg(cfg);
load_weights(&net, weights);
char *cfgbase = basecfg(cfg);
char *imbase = basecfg(input);
set_batch_network(&net, 1);
image im = load_image_color(input, 0, 0);
float *features = 0;
image update;
if (reconstruct){
im = letterbox_image(im, net.w, net.h);
int zz = 0;
network_predict(net, im.data);
image out_im = get_network_image(net);
image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz);
//flip_image(crop);
image f_im = resize_image(crop, out_im.w, out_im.h);
free_image(crop);
printf("%d features\n", out_im.w*out_im.h*out_im.c);
im = resize_image(im, im.w, im.h);
f_im = resize_image(f_im, f_im.w, f_im.h);
features = f_im.data;
int i;
for(i = 0; i < 14*14*512; ++i){
features[i] += rand_uniform(-.19, .19);
}
free_image(im);
im = make_random_image(im.w, im.h, im.c);
update = make_image(im.w, im.h, im.c);
}
int e;
int n;
for(e = 0; e < rounds; ++e){
fprintf(stderr, "Iteration: ");
fflush(stderr);
for(n = 0; n < iters; ++n){
fprintf(stderr, "%d, ", n);
fflush(stderr);
if(reconstruct){
reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1);
//if ((n+1)%30 == 0) rate *= .5;
show_image(im, "reconstruction");
#ifdef OPENCV
cvWaitKey(10);
#endif
}else{
int layer = max_layer + rand()%range - range/2;
int octave = rand()%octaves;
optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm);
}
}
fprintf(stderr, "done\n");
char buff[256];
if (prefix){
sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e);
}else{
sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e);
}
printf("%d %s\n", e, buff);
save_image(im, buff);
//show_image(im, buff);
//cvWaitKey(0);
if(rotate){
image rot = rotate_image(im, rotate);
free_image(im);
im = rot;
}
image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom);
image resized = resize_image(crop, im.w, im.h);
free_image(im);
free_image(crop);
im = resized;
}
}
*/
void run_nightmare(int argc, char **argv)
{
@ -224,6 +333,7 @@ void run_nightmare(int argc, char **argv)
free_image(im);
im = resized;
}
im = letterbox_image(im, net.w, net.h);
float *features = 0;
image update;
@ -246,13 +356,11 @@ void run_nightmare(int argc, char **argv)
int i;
for(i = 0; i < 14*14*512; ++i){
features[i] += rand_uniform(-.19, .19);
//features[i] += rand_uniform(-.19, .19);
}
free_image(im);
im = make_random_image(im.w, im.h, im.c);
update = make_image(im.w, im.h, im.c);
}
int e;

View File

@ -9,6 +9,7 @@
#include "batchnorm_layer.h"
#include "blas.h"
#include "connected_layer.h"
#include "deconvolutional_layer.h"
#include "convolutional_layer.h"
#include "cost_layer.h"
#include "crnn_layer.h"
@ -48,6 +49,8 @@ LAYER_TYPE string_to_layer_type(char * type)
if (strcmp(type, "[local]")==0) return LOCAL;
if (strcmp(type, "[conv]")==0
|| strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
if (strcmp(type, "[deconv]")==0
|| strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL;
if (strcmp(type, "[activation]")==0) return ACTIVE;
if (strcmp(type, "[net]")==0
|| strcmp(type, "[network]")==0) return NETWORK;
@ -135,6 +138,29 @@ local_layer parse_local(list *options, size_params params)
return layer;
}
layer parse_deconvolutional(list *options, size_params params)
{
int n = option_find_int(options, "filters",1);
int size = option_find_int(options, "size",1);
int stride = option_find_int(options, "stride",1);
char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);
int batch,h,w,c;
h = params.h;
w = params.w;
c = params.c;
batch=params.batch;
if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation, batch_normalize);
return l;
}
convolutional_layer parse_convolutional(list *options, size_params params)
{
int n = option_find_int(options, "filters",1);
@ -312,6 +338,7 @@ cost_layer parse_cost(list *options, size_params params)
float scale = option_find_float_quiet(options, "scale",1);
cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
layer.ratio = option_find_float_quiet(options, "ratio",0);
layer.thresh = option_find_float_quiet(options, "thresh",0);
return layer;
}
@ -343,6 +370,8 @@ layer parse_reorg(list *options, size_params params)
{
int stride = option_find_int(options, "stride",1);
int reverse = option_find_int_quiet(options, "reverse",0);
int flatten = option_find_int_quiet(options, "flatten",0);
int extra = option_find_int_quiet(options, "extra",0);
int batch,h,w,c;
h = params.h;
@ -351,7 +380,7 @@ layer parse_reorg(list *options, size_params params)
batch=params.batch;
if(!(h && w && c)) error("Layer before reorg layer must output image.");
layer layer = make_reorg_layer(batch,w,h,c,stride,reverse);
layer layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra);
return layer;
}
@ -508,6 +537,7 @@ void parse_net_options(list *options, network *net)
net->decay = option_find_float(options, "decay", .0001);
int subdivs = option_find_int(options, "subdivisions",1);
net->time_steps = option_find_int_quiet(options, "time_steps",1);
net->notruth = option_find_int_quiet(options, "notruth",0);
net->batch /= subdivs;
net->batch *= net->time_steps;
net->subdivisions = subdivs;
@ -537,6 +567,7 @@ void parse_net_options(list *options, network *net)
char *policy_s = option_find_str(options, "policy", "constant");
net->policy = get_policy(policy_s);
net->burn_in = option_find_int_quiet(options, "burn_in", 0);
net->power = option_find_float_quiet(options, "power", 4);
if(net->policy == STEP){
net->step = option_find_int(options, "step", 1);
net->scale = option_find_float(options, "scale", 1);
@ -570,7 +601,6 @@ void parse_net_options(list *options, network *net)
net->gamma = option_find_float(options, "gamma", 1);
net->step = option_find_int(options, "step", 1);
} else if (net->policy == POLY || net->policy == RANDOM){
net->power = option_find_float(options, "power", 1);
}
net->max_batches = option_find_int(options, "max_batches", 0);
}
@ -617,6 +647,8 @@ network parse_network_cfg(char *filename)
LAYER_TYPE lt = string_to_layer_type(s->type);
if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
}else if(lt == DECONVOLUTIONAL){
l = parse_deconvolutional(options, params);
}else if(lt == LOCAL){
l = parse_local(options, params);
}else if(lt == ACTIVE){
@ -665,8 +697,13 @@ network parse_network_cfg(char *filename)
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}
l.truth = option_find_int_quiet(options, "truth", 0);
l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0);
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
l.smooth = option_find_float_quiet(options, "smooth", 0);
option_unused(options);
net.layers[count] = l;
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
@ -840,7 +877,7 @@ void save_weights_upto(network net, char *filename, int cutoff)
int i;
for(i = 0; i < net.n && i < cutoff; ++i){
layer l = net.layers[i];
if(l.type == CONVOLUTIONAL){
if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
save_convolutional_weights(l, fp);
} if(l.type == CONNECTED){
save_connected_weights(l, fp);
@ -1005,7 +1042,7 @@ void load_convolutional_weights(layer l, FILE *fp)
}
void load_weights_upto(network *net, char *filename, int cutoff)
void load_weights_upto(network *net, char *filename, int start, int cutoff)
{
#ifdef GPU
if(net->gpu_index >= 0){
@ -1027,10 +1064,10 @@ void load_weights_upto(network *net, char *filename, int cutoff)
int transpose = (major > 1000) || (minor > 1000);
int i;
for(i = 0; i < net->n && i < cutoff; ++i){
for(i = start; i < net->n && i < cutoff; ++i){
layer l = net->layers[i];
if (l.dontload) continue;
if(l.type == CONVOLUTIONAL){
if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
load_convolutional_weights(l, fp);
}
if(l.type == CONNECTED){
@ -1075,6 +1112,6 @@ void load_weights_upto(network *net, char *filename, int cutoff)
void load_weights(network *net, char *filename)
{
load_weights_upto(net, filename, net->n);
load_weights_upto(net, filename, 0, net->n);
}

View File

@ -8,6 +8,6 @@ void save_weights(network net, char *filename);
void save_weights_upto(network net, char *filename, int cutoff);
void save_weights_double(network net, char *filename);
void load_weights(network *net, char *filename);
void load_weights_upto(network *net, char *filename, int cutoff);
void load_weights_upto(network *net, char *filename, int start, int cutoff);
#endif

View File

@ -18,6 +18,10 @@ layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
l.batch = batch;
l.h = h;
l.w = w;
l.c = n*(classes + coords + 1);
l.out_w = l.w;
l.out_h = l.h;
l.out_c = l.c;
l.classes = classes;
l.coords = coords;
l.cost = calloc(1, sizeof(float));
@ -68,19 +72,19 @@ void resize_region_layer(layer *l, int w, int h)
#endif
}
box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h)
box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
{
box b;
b.x = (i + logistic_activate(x[index + 0])) / w;
b.y = (j + logistic_activate(x[index + 1])) / h;
b.w = exp(x[index + 2]) * biases[2*n] / w;
b.h = exp(x[index + 3]) * biases[2*n+1] / h;
b.x = (i + x[index + 0*stride]) / w;
b.y = (j + x[index + 1*stride]) / h;
b.w = exp(x[index + 2*stride]) * biases[2*n] / w;
b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
return b;
}
float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale)
float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride)
{
box pred = get_region_box(x, biases, n, index, i, j, w, h);
box pred = get_region_box(x, biases, n, index, i, j, w, h, stride);
float iou = box_iou(pred, truth);
float tx = (truth.x*w - i);
@ -88,34 +92,34 @@ float delta_region_box(box truth, float *x, float *biases, int n, int index, int
float tw = log(truth.w*w / biases[2*n]);
float th = log(truth.h*h / biases[2*n + 1]);
delta[index + 0] = scale * (tx - logistic_activate(x[index + 0])) * logistic_gradient(logistic_activate(x[index + 0]));
delta[index + 1] = scale * (ty - logistic_activate(x[index + 1])) * logistic_gradient(logistic_activate(x[index + 1]));
delta[index + 2] = scale * (tw - x[index + 2]);
delta[index + 3] = scale * (th - x[index + 3]);
delta[index + 0*stride] = scale * (tx - x[index + 0*stride]);
delta[index + 1*stride] = scale * (ty - x[index + 1*stride]);
delta[index + 2*stride] = scale * (tw - x[index + 2*stride]);
delta[index + 3*stride] = scale * (th - x[index + 3*stride]);
return iou;
}
void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, float *avg_cat)
void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat)
{
int i, n;
if(hier){
float pred = 1;
while(class >= 0){
pred *= output[index + class];
pred *= output[index + stride*class];
int g = hier->group[class];
int offset = hier->group_offset[g];
for(i = 0; i < hier->group_size[g]; ++i){
delta[index + offset + i] = scale * (0 - output[index + offset + i]);
delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]);
}
delta[index + class] = scale * (1 - output[index + class]);
delta[index + stride*class] = scale * (1 - output[index + stride*class]);
class = hier->parent[class];
}
*avg_cat += pred;
} else {
for(n = 0; n < classes; ++n){
delta[index + n] = scale * (((n == class)?1 : 0) - output[index + n]);
if(n == class) *avg_cat += output[index + n];
delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]);
if(n == class) *avg_cat += output[index + stride*n];
}
}
}
@ -130,42 +134,35 @@ float tisnan(float x)
return (x != x);
}
int entry_index(layer l, int batch, int location, int entry)
{
int n = location / (l.w*l.h);
int loc = location % (l.w*l.h);
return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc;
}
void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output);
void forward_region_layer(const layer l, network_state state)
{
int i,j,b,t,n;
int size = l.coords + l.classes + 1;
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
#ifndef GPU
flatten(l.output, l.w*l.h, size*l.n, l.batch, 1);
#endif
for (b = 0; b < l.batch; ++b){
for(i = 0; i < l.h*l.w*l.n; ++i){
int index = size*i + b*l.outputs;
l.output[index + 4] = logistic_activate(l.output[index + 4]);
}
}
#ifndef GPU
if (l.softmax_tree){
for (b = 0; b < l.batch; ++b){
for(i = 0; i < l.h*l.w*l.n; ++i){
int index = size*i + b*l.outputs;
softmax_tree(l.output + index + 5, 1, 0, 1, l.softmax_tree, l.output + index + 5);
}
int i;
int count = 5;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_cpu(state.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count);
count += group_size;
}
} else if (l.softmax){
for (b = 0; b < l.batch; ++b){
for(i = 0; i < l.h*l.w*l.n; ++i){
int index = size*i + b*l.outputs;
softmax(l.output + index + 5, l.classes, 1, l.output + index + 5);
}
}
softmax_cpu(state.input + 5, l.classes, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + 5);
}
#endif
if(!state.train) return;
memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
if(!state.train) return;
float avg_iou = 0;
float recall = 0;
float avg_cat = 0;
@ -178,26 +175,28 @@ void forward_region_layer(const layer l, network_state state)
if(l.softmax_tree){
int onlyclass = 0;
for(t = 0; t < 30; ++t){
box truth = float_to_box(state.truth + t*5 + b*l.truths);
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
if(!truth.x) break;
int class = state.truth[t*5 + b*l.truths + 4];
float maxp = 0;
int maxi = 0;
if(truth.x > 100000 && truth.y > 100000){
for(n = 0; n < l.n*l.w*l.h; ++n){
int index = size*n + b*l.outputs + 5;
float scale = l.output[index-1];
l.delta[index - 1] = l.noobject_scale * ((0 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class);
int class_index = entry_index(l, b, n, 5);
int obj_index = entry_index(l, b, n, 4);
float scale = l.output[obj_index];
l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]);
float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h);
if(p > maxp){
maxp = p;
maxi = n;
}
}
int index = size*maxi + b*l.outputs + 5;
delta_region_class(l.output, l.delta, index, class, l.classes, l.softmax_tree, l.class_scale, &avg_cat);
if(l.output[index - 1] < .3) l.delta[index - 1] = l.object_scale * ((.3 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
else l.delta[index - 1] = 0;
int class_index = entry_index(l, b, maxi, 5);
int obj_index = entry_index(l, b, maxi, 4);
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]);
else l.delta[obj_index] = 0;
++class_count;
onlyclass = 1;
break;
@ -208,21 +207,22 @@ void forward_region_layer(const layer l, network_state state)
for (j = 0; j < l.h; ++j) {
for (i = 0; i < l.w; ++i) {
for (n = 0; n < l.n; ++n) {
int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
float best_iou = 0;
for(t = 0; t < 30; ++t){
box truth = float_to_box(state.truth + t*5 + b*l.truths);
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
if(!truth.x) break;
float iou = box_iou(pred, truth);
if (iou > best_iou) {
best_iou = iou;
}
}
avg_anyobj += l.output[index + 4];
l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4);
avg_anyobj += l.output[obj_index];
l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]);
if (best_iou > l.thresh) {
l.delta[index + 4] = 0;
l.delta[obj_index] = 0;
}
if(*(state.net.seen) < 12800){
@ -231,17 +231,16 @@ void forward_region_layer(const layer l, network_state state)
truth.y = (j + .5)/l.h;
truth.w = l.biases[2*n]/l.w;
truth.h = l.biases[2*n+1]/l.h;
delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01);
delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h);
}
}
}
}
for(t = 0; t < 30; ++t){
box truth = float_to_box(state.truth + t*5 + b*l.truths);
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
if(!truth.x) break;
float best_iou = 0;
int best_index = 0;
int best_n = 0;
i = (truth.x * l.w);
j = (truth.y * l.h);
@ -251,8 +250,8 @@ void forward_region_layer(const layer l, network_state state)
truth_shift.y = 0;
//printf("index %d %d\n",i, j);
for(n = 0; n < l.n; ++n){
int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
if(l.bias_match){
pred.w = l.biases[2*n]/l.w;
pred.h = l.biases[2*n+1]/l.h;
@ -262,80 +261,118 @@ void forward_region_layer(const layer l, network_state state)
pred.y = 0;
float iou = box_iou(pred, truth_shift);
if (iou > best_iou){
best_index = index;
best_iou = iou;
best_n = n;
}
}
//printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h);
float iou = delta_region_box(truth, l.output, l.biases, best_n, best_index, i, j, l.w, l.h, l.delta, l.coord_scale);
int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0);
float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h);
if(iou > .5) recall += 1;
avg_iou += iou;
//l.delta[best_index + 4] = iou - l.output[best_index + 4];
avg_obj += l.output[best_index + 4];
l.delta[best_index + 4] = l.object_scale * (1 - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]);
int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4);
avg_obj += l.output[obj_index];
l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]);
if (l.rescore) {
l.delta[best_index + 4] = l.object_scale * (iou - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]);
l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]);
}
int class = state.truth[t*5 + b*l.truths + 4];
if (l.map) class = l.map[class];
delta_region_class(l.output, l.delta, best_index + 5, class, l.classes, l.softmax_tree, l.class_scale, &avg_cat);
int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 5);
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
++count;
++class_count;
}
}
//printf("\n");
#ifndef GPU
flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0);
#endif
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count);
}
void backward_region_layer(const layer l, network_state state)
{
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
/*
int b;
int size = l.coords + l.classes + 1;
for (b = 0; b < l.batch*l.n; ++b){
int index = (b*size + 4)*l.w*l.h;
gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index);
}
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
*/
}
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh)
{
int i,j,n;
int i,j,n,z;
float *predictions = l.output;
if (l.batch == 2) {
float *flip = l.output + l.outputs;
for (j = 0; j < l.h; ++j) {
for (i = 0; i < l.w/2; ++i) {
for (n = 0; n < l.n; ++n) {
for(z = 0; z < l.classes + 5; ++z){
int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i;
int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1);
float swap = flip[i1];
flip[i1] = flip[i2];
flip[i2] = swap;
if(z == 0){
flip[i1] = -flip[i1];
flip[i2] = -flip[i2];
}
}
}
}
}
for(i = 0; i < l.outputs; ++i){
l.output[i] = (l.output[i] + flip[i])/2.;
}
}
for (i = 0; i < l.w*l.h; ++i){
int row = i / l.w;
int col = i % l.w;
for(n = 0; n < l.n; ++n){
int index = i*l.n + n;
int p_index = index * (l.classes + 5) + 4;
float scale = predictions[p_index];
int box_index = index * (l.classes + 5);
boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h);
int index = n*l.w*l.h + i;
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
float scale = predictions[obj_index];
boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
if(1){
int max = w > h ? w : h;
boxes[index].x = (boxes[index].x - (max - w)/2./max) / ((float)w/max);
boxes[index].y = (boxes[index].y - (max - h)/2./max) / ((float)h/max);
boxes[index].w *= (float)max/w;
boxes[index].h *= (float)max/h;
}
boxes[index].x *= w;
boxes[index].y *= h;
boxes[index].w *= w;
boxes[index].h *= h;
int class_index = index * (l.classes + 5) + 5;
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5);
if(l.softmax_tree){
hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);
hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h);
if(map){
for(j = 0; j < 200; ++j){
float prob = scale*predictions[class_index+map[j]];
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5 + map[j]);
float prob = scale*predictions[class_index];
probs[index][j] = (prob > thresh) ? prob : 0;
}
} else {
int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh);
int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h);
probs[index][j] = (scale > thresh) ? scale : 0;
probs[index][l.classes] = scale;
}
} else {
for(j = 0; j < l.classes; ++j){
float prob = scale*predictions[class_index+j];
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5 + j);
float prob = scale*predictions[class_index];
probs[index][j] = (prob > thresh) ? prob : 0;
}
}
@ -350,23 +387,33 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
void forward_region_layer_gpu(const layer l, network_state state)
{
/*
if(!state.train){
copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
return;
}
*/
flatten_ongpu(state.input, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 1, l.output_gpu);
if(l.softmax_tree){
copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
int b, n;
for (b = 0; b < l.batch; ++b){
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC);
index = entry_index(l, b, n*l.w*l.h, 4);
activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC);
}
}
if (l.softmax_tree){
int i;
int count = 5;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_gpu(l.output_gpu+count, group_size, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + count);
int index = entry_index(l, 0, 0, count);
softmax_gpu(state.input + index, group_size, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
count += group_size;
}
}else if (l.softmax){
softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
} else if (l.softmax) {
int index = entry_index(l, 0, 0, 5);
//printf("%d\n", index);
softmax_gpu(state.input + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
}
if(!state.train || l.onlyforward){
cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
return;
}
float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
@ -382,16 +429,25 @@ void forward_region_layer_gpu(const layer l, network_state state)
cpu_state.truth = truth_cpu;
cpu_state.input = in_cpu;
forward_region_layer(l, cpu_state);
//cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
free(cpu_state.input);
if(!state.train) return;
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
if(cpu_state.truth) free(cpu_state.truth);
}
void backward_region_layer_gpu(layer l, network_state state)
void backward_region_layer_gpu(const layer l, network_state state)
{
flatten_ongpu(l.delta_gpu, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 0, state.delta);
int b, n;
for (b = 0; b < l.batch; ++b){
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
gradient_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index);
index = entry_index(l, b, n*l.w*l.h, 4);
gradient_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index);
}
}
axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
}
#endif

261
src/regressor.c Normal file
View File

@ -0,0 +1,261 @@
#include "network.h"
#include "utils.h"
#include "parser.h"
#include "option_list.h"
#include "blas.h"
#include "assert.h"
#include "cuda.h"
#include <sys/time.h>
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
image get_image_from_stream(CvCapture *cap);
#endif
void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
{
int i;
float avg_loss = -1;
char *base = basecfg(cfgfile);
printf("%s\n", base);
printf("%d\n", ngpus);
network *nets = calloc(ngpus, sizeof(network));
srand(time(0));
int seed = rand();
for(i = 0; i < ngpus; ++i){
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);
#endif
nets[i] = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&nets[i], weightfile);
}
if(clear) *nets[i].seen = 0;
nets[i].learning_rate *= ngpus;
}
srand(time(0));
network net = nets[0];
int imgs = net.batch * net.subdivisions * ngpus;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
list *options = read_data_cfg(datacfg);
char *backup_directory = option_find_str(options, "backup", "/backup/");
char *train_list = option_find_str(options, "train", "data/train.list");
list *plist = get_paths(train_list);
char **paths = (char **)list_to_array(plist);
printf("%d\n", plist->size);
int N = plist->size;
clock_t time;
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.threads = 32;
args.min = net.min_crop;
args.max = net.max_crop;
args.angle = net.angle;
args.aspect = net.aspect;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
args.size = net.w;
args.paths = paths;
args.n = imgs;
args.m = N;
args.type = REGRESSION_DATA;
data train;
data buffer;
pthread_t load_thread;
args.d = &buffer;
load_thread = load_data(args);
int epoch = (*net.seen)/N;
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data(args);
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
float loss = 0;
#ifdef GPU
if(ngpus == 1){
loss = train_network(net, train);
} else {
loss = train_networks(nets, ngpus, train, 4);
}
#else
loss = train_network(net, train);
#endif
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
free_data(train);
if(*net.seen/N > epoch){
epoch = *net.seen/N;
char buff[256];
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
save_weights(net, buff);
}
if(get_current_batch(net)%100 == 0){
char buff[256];
sprintf(buff, "%s/%s.backup",backup_directory,base);
save_weights(net, buff);
}
}
char buff[256];
sprintf(buff, "%s/%s.weights", backup_directory, base);
save_weights(net, buff);
free_network(net);
free_ptrs((void**)paths, plist->size);
free_list(plist);
free(base);
}
void predict_regressor(char *cfgfile, char *weightfile, char *filename)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 1);
srand(2222222);
clock_t time;
char buff[256];
char *input = buff;
while(1){
if(filename){
strncpy(input, filename, 256);
}else{
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input, 0, 0);
image sized = letterbox_image(im, net.w, net.h);
float *X = sized.data;
time=clock();
float *predictions = network_predict(net, X);
printf("Predicted: %f\n", predictions[0]);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
free_image(im);
free_image(sized);
if (filename) break;
}
}
void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
{
#ifdef OPENCV
printf("Regressor Demo\n");
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 1);
srand(2222222);
CvCapture * cap;
if(filename){
cap = cvCaptureFromFile(filename);
}else{
cap = cvCaptureFromCAM(cam_index);
}
if(!cap) error("Couldn't connect to webcam.\n");
cvNamedWindow("Regressor", CV_WINDOW_NORMAL);
cvResizeWindow("Regressor", 512, 512);
float fps = 0;
int i;
while(1){
struct timeval tval_before, tval_after, tval_result;
gettimeofday(&tval_before, NULL);
image in = get_image_from_stream(cap);
image in_s = letterbox_image(in, net.w, net.h);
show_image(in, "Regressor");
float *predictions = network_predict(net, in_s.data);
printf("\033[2J");
printf("\033[1;1H");
printf("\nFPS:%.0f\n",fps);
printf("People: %f\n", predictions[0]);
free_image(in_s);
free_image(in);
cvWaitKey(10);
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
float curr = 1000000.f/((long int)tval_result.tv_usec);
fps = .9*fps + .1*curr;
}
#endif
}
void run_regressor(int argc, char **argv)
{
if(argc < 4){
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
if(gpu_list){
printf("%s\n", gpu_list);
int len = strlen(gpu_list);
ngpus = 1;
int i;
for(i = 0; i < len; ++i){
if (gpu_list[i] == ',') ++ngpus;
}
gpus = calloc(ngpus, sizeof(int));
for(i = 0; i < ngpus; ++i){
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',')+1;
}
} else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
}
int cam_index = find_int_arg(argc, argv, "-c", 0);
int clear = find_arg(argc, argv, "-clear");
char *data = argv[3];
char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0;
char *filename = (argc > 6) ? argv[6]: 0;
if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights);
else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename);
}

View File

@ -4,15 +4,17 @@
#include <stdio.h>
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra)
{
layer l = {0};
l.type = REORG;
l.batch = batch;
l.stride = stride;
l.extra = extra;
l.h = h;
l.w = w;
l.c = c;
l.flatten = flatten;
if(reverse){
l.out_w = w*stride;
l.out_h = h*stride;
@ -23,10 +25,20 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
l.out_c = c*(stride*stride);
}
l.reverse = reverse;
fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c);
l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = h*w*c;
int output_size = l.out_h * l.out_w * l.out_c * batch;
if(l.extra){
l.out_w = l.out_h = l.out_c = 0;
l.outputs = l.inputs + l.extra;
}
if(extra){
fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs);
} else {
fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c);
}
int output_size = l.outputs * batch;
l.output = calloc(output_size, sizeof(float));
l.delta = calloc(output_size, sizeof(float));
@ -77,17 +89,41 @@ void resize_reorg_layer(layer *l, int w, int h)
void forward_reorg_layer(const layer l, network_state state)
{
if(l.reverse){
int i;
if(l.flatten){
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
if(l.reverse){
flatten(l.output, l.w*l.h, l.c, l.batch, 0);
}else{
flatten(l.output, l.w*l.h, l.c, l.batch, 1);
}
} else if (l.extra) {
for(i = 0; i < l.batch; ++i){
copy_cpu(l.inputs, state.input + i*l.inputs, 1, l.output + i*l.outputs, 1);
}
} else if (l.reverse){
reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output);
}else {
} else {
reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output);
}
}
void backward_reorg_layer(const layer l, network_state state)
{
if(l.reverse){
int i;
if(l.flatten){
memcpy(state.delta, l.delta, l.outputs*l.batch*sizeof(float));
if(l.reverse){
flatten(state.delta, l.w*l.h, l.c, l.batch, 1);
}else{
flatten(state.delta, l.w*l.h, l.c, l.batch, 0);
}
} else if(l.reverse){
reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
} else if (l.extra) {
for(i = 0; i < l.batch; ++i){
copy_cpu(l.inputs, l.delta + i*l.outputs, 1, state.delta + i*l.inputs, 1);
}
}else{
reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
}
@ -96,7 +132,18 @@ void backward_reorg_layer(const layer l, network_state state)
#ifdef GPU
void forward_reorg_layer_gpu(layer l, network_state state)
{
if(l.reverse){
int i;
if(l.flatten){
if(l.reverse){
flatten_ongpu(state.input, l.w*l.h, l.c, l.batch, 0, l.output_gpu);
}else{
flatten_ongpu(state.input, l.w*l.h, l.c, l.batch, 1, l.output_gpu);
}
} else if (l.extra) {
for(i = 0; i < l.batch; ++i){
copy_ongpu(l.inputs, state.input + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1);
}
} else if (l.reverse) {
reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu);
}else {
reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu);
@ -105,9 +152,20 @@ void forward_reorg_layer_gpu(layer l, network_state state)
void backward_reorg_layer_gpu(layer l, network_state state)
{
if(l.reverse){
if(l.flatten){
if(l.reverse){
flatten_ongpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, state.delta);
}else{
flatten_ongpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, state.delta);
}
} else if (l.extra) {
int i;
for(i = 0; i < l.batch; ++i){
copy_ongpu(l.inputs, l.delta_gpu + i*l.outputs, 1, state.delta + i*l.inputs, 1);
}
} else if(l.reverse){
reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
}else{
} else {
reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
}
}

View File

@ -6,7 +6,7 @@
#include "layer.h"
#include "network.h"
layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse);
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra);
void resize_reorg_layer(layer *l, int w, int h);
void forward_reorg_layer(const layer l, network_state state);
void backward_reorg_layer(const layer l, network_state state);

View File

@ -32,40 +32,24 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
return l;
}
void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output)
{
int b;
for(b = 0; b < batch; ++b){
int i;
int count = 0;
for(i = 0; i < hierarchy->groups; ++i){
int group_size = hierarchy->group_size[i];
softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count);
count += group_size;
}
}
}
void forward_softmax_layer(const softmax_layer l, network_state state)
{
int b;
int inputs = l.inputs / l.groups;
int batch = l.batch * l.groups;
if(l.softmax_tree){
softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output);
} else {
for(b = 0; b < batch; ++b){
softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs);
int i;
int count = 0;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_cpu(state.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count);
count += group_size;
}
} else {
softmax_cpu(state.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output);
}
}
void backward_softmax_layer(const softmax_layer l, network_state state)
{
int i;
for(i = 0; i < l.inputs*l.batch; ++i){
state.delta[i] += l.delta[i];
}
axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, state.delta, 1);
}
#ifdef GPU
@ -77,18 +61,16 @@ void pull_softmax_layer_output(const softmax_layer layer)
void forward_softmax_layer_gpu(const softmax_layer l, network_state state)
{
int inputs = l.inputs / l.groups;
int batch = l.batch * l.groups;
if(l.softmax_tree){
int i;
int count = 0;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_gpu(state.input+count, group_size, inputs, batch, l.temperature, l.output_gpu + count);
softmax_gpu(state.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count);
count += group_size;
}
} else {
softmax_gpu(state.input, inputs, inputs, batch, l.temperature, l.output_gpu);
softmax_gpu(state.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
}
}

View File

@ -7,7 +7,7 @@
#include "opencv2/highgui/highgui_c.h"
#endif
void train_super(char *cfgfile, char *weightfile)
void train_super(char *cfgfile, char *weightfile, int clear)
{
char *train_images = "/data/imagenet/imagenet1k.train.list";
char *backup_directory = "/home/pjreddie/backup/";
@ -19,6 +19,7 @@ void train_super(char *cfgfile, char *weightfile)
if(weightfile){
load_weights(&net, weightfile);
}
if(clear) *net.seen = 0;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch*net.subdivisions;
int i = *net.seen/imgs;
@ -123,7 +124,8 @@ void run_super(int argc, char **argv)
char *cfg = argv[3];
char *weights = (argc > 4) ? argv[4] : 0;
char *filename = (argc > 5) ? argv[5] : 0;
if(0==strcmp(argv[2], "train")) train_super(cfg, weights);
int clear = find_arg(argc, argv, "-clear");
if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear);
else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename);
/*
else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights);

View File

@ -24,33 +24,33 @@ void change_leaves(tree *t, char *leaf_list)
fprintf(stderr, "Found %d leaves.\n", found);
}
float get_hierarchy_probability(float *x, tree *hier, int c)
float get_hierarchy_probability(float *x, tree *hier, int c, int stride)
{
float p = 1;
while(c >= 0){
p = p * x[c];
p = p * x[c*stride];
c = hier->parent[c];
}
return p;
}
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves)
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride)
{
int j;
for(j = 0; j < n; ++j){
int parent = hier->parent[j];
if(parent >= 0){
predictions[j] *= predictions[parent];
predictions[j*stride] *= predictions[parent*stride];
}
}
if(only_leaves){
for(j = 0; j < n; ++j){
if(!hier->leaf[j]) predictions[j] = 0;
if(!hier->leaf[j]) predictions[j*stride] = 0;
}
}
}
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh)
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride)
{
float p = 1;
int group = 0;
@ -61,7 +61,7 @@ int hierarchy_top_prediction(float *predictions, tree *hier, float thresh)
for(i = 0; i < hier->group_size[group]; ++i){
int index = i + hier->group_offset[group];
float val = predictions[i + hier->group_offset[group]];
float val = predictions[(i + hier->group_offset[group])*stride];
if(val > max){
max_i = index;
max = val;

View File

@ -15,9 +15,9 @@ typedef struct{
} tree;
tree *read_tree(char *filename);
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves);
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride);
void change_leaves(tree *t, char *leaf_list);
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh);
float get_hierarchy_probability(float *x, tree *hier, int c);
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride);
float get_hierarchy_probability(float *x, tree *hier, int c, int stride);
#endif