mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
:eyeofthetiger::noseofthetiger::eyeofthetiger2:
This commit is contained in:
parent
b61bcf544e
commit
60e952ba69
6
Makefile
6
Makefile
@ -10,7 +10,7 @@ ARCH= -gencode arch=compute_20,code=[sm_20,sm_21] \
|
||||
-gencode arch=compute_52,code=[sm_52,compute_52]
|
||||
|
||||
# This is what I use, uncomment if you know your arch and want to specify
|
||||
# ARCH= -gencode arch=compute_52,code=compute_52
|
||||
ARCH= -gencode arch=compute_52,code=compute_52
|
||||
|
||||
VPATH=./src/
|
||||
EXEC=darknet
|
||||
@ -48,10 +48,10 @@ CFLAGS+= -DCUDNN
|
||||
LDFLAGS+= -lcudnn
|
||||
endif
|
||||
|
||||
OBJ=gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o tree.o
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o regressor.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o lsd.o super.o voxel.o tree.o
|
||||
ifeq ($(GPU), 1)
|
||||
LDFLAGS+= -lstdc++
|
||||
OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
|
||||
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
|
||||
endif
|
||||
|
||||
OBJS = $(addprefix $(OBJDIR), $(OBJ))
|
||||
|
@ -1,7 +1,7 @@
|
||||
classes= 80
|
||||
train = /home/pjreddie/data/coco/trainvalno5k.txt
|
||||
#valid = coco_testdev
|
||||
valid = data/coco_val_5k.list
|
||||
valid = coco_testdev
|
||||
#valid = data/coco_val_5k.list
|
||||
names = data/coco.names
|
||||
backup = /home/pjreddie/backup/
|
||||
eval=coco
|
||||
|
135
cfg/go.cfg
Normal file
135
cfg/go.cfg
Normal file
@ -0,0 +1,135 @@
|
||||
[net]
|
||||
batch=512
|
||||
subdivisions=1
|
||||
height=19
|
||||
width=19
|
||||
channels=1
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
|
||||
burn_in=1000
|
||||
learning_rate=0.1
|
||||
policy=poly
|
||||
power=4
|
||||
max_batches=10000000
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=linear
|
||||
|
||||
[reorg]
|
||||
extra=1
|
||||
stride=1
|
||||
|
||||
[softmax]
|
||||
|
||||
[cost]
|
||||
type=sse
|
||||
|
@ -7,13 +7,13 @@ channels=1
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
|
||||
learning_rate=0.1
|
||||
learning_rate=0.01
|
||||
policy=poly
|
||||
power=4
|
||||
max_batches=400000
|
||||
max_batches=100000
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -21,7 +21,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -29,7 +29,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -37,7 +37,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -45,7 +45,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -53,7 +53,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -61,7 +61,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -69,7 +69,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -77,7 +77,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -85,7 +85,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -93,7 +93,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -101,7 +101,7 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
@ -109,14 +109,13 @@ activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
|
||||
[convolutional]
|
||||
filters=1
|
||||
size=1
|
||||
@ -124,6 +123,10 @@ stride=1
|
||||
pad=1
|
||||
activation=linear
|
||||
|
||||
[reorg]
|
||||
extra=1
|
||||
stride=1
|
||||
|
||||
[softmax]
|
||||
|
||||
[cost]
|
||||
|
@ -12,7 +12,7 @@ exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
max_batches = 40100
|
||||
max_batches = 40200
|
||||
policy=steps
|
||||
steps=-1,100,20000,30000
|
||||
scales=.1,10,.1,.1
|
||||
|
244
cfg/yolo-voc.2.0.cfg
Normal file
244
cfg/yolo-voc.2.0.cfg
Normal file
@ -0,0 +1,244 @@
|
||||
[net]
|
||||
batch=64
|
||||
subdivisions=8
|
||||
height=416
|
||||
width=416
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.0001
|
||||
max_batches = 45000
|
||||
policy=steps
|
||||
steps=100,25000,35000
|
||||
scales=10,.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-3
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=125
|
||||
activation=linear
|
||||
|
||||
[region]
|
||||
anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
|
||||
bias_match=1
|
||||
classes=20
|
||||
coords=4
|
||||
num=5
|
||||
softmax=1
|
||||
jitter=.2
|
||||
rescore=1
|
||||
|
||||
object_scale=5
|
||||
noobject_scale=1
|
||||
class_scale=1
|
||||
coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=0
|
@ -11,11 +11,12 @@ saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.0001
|
||||
max_batches = 45000
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 80200
|
||||
policy=steps
|
||||
steps=100,25000,35000
|
||||
scales=10,.1,.1
|
||||
steps=40000,60000
|
||||
scales=.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
@ -203,11 +204,19 @@ activation=leaky
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=64
|
||||
activation=leaky
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-3
|
||||
layers=-1,-4
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
@ -224,14 +233,15 @@ pad=1
|
||||
filters=125
|
||||
activation=linear
|
||||
|
||||
|
||||
[region]
|
||||
anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
|
||||
anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
|
||||
bias_match=1
|
||||
classes=20
|
||||
coords=4
|
||||
num=5
|
||||
softmax=1
|
||||
jitter=.2
|
||||
jitter=.3
|
||||
rescore=1
|
||||
|
||||
object_scale=5
|
||||
@ -241,4 +251,4 @@ coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=0
|
||||
random=1
|
||||
|
244
cfg/yolo.2.0.cfg
Normal file
244
cfg/yolo.2.0.cfg
Normal file
@ -0,0 +1,244 @@
|
||||
[net]
|
||||
batch=1
|
||||
subdivisions=1
|
||||
width=416
|
||||
height=416
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
max_batches = 120000
|
||||
policy=steps
|
||||
steps=-1,100,80000,100000
|
||||
scales=.1,10,.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-3
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=425
|
||||
activation=linear
|
||||
|
||||
[region]
|
||||
anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741
|
||||
bias_match=1
|
||||
classes=80
|
||||
coords=4
|
||||
num=5
|
||||
softmax=1
|
||||
jitter=.2
|
||||
rescore=1
|
||||
|
||||
object_scale=5
|
||||
noobject_scale=1
|
||||
class_scale=1
|
||||
coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=0
|
32
cfg/yolo.cfg
32
cfg/yolo.cfg
@ -1,8 +1,8 @@
|
||||
[net]
|
||||
batch=1
|
||||
subdivisions=1
|
||||
width=416
|
||||
height=416
|
||||
batch=64
|
||||
subdivisions=8
|
||||
height=608
|
||||
width=608
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
@ -12,10 +12,11 @@ exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
max_batches = 120000
|
||||
burn_in=1000
|
||||
max_batches = 500200
|
||||
policy=steps
|
||||
steps=-1,100,80000,100000
|
||||
scales=.1,10,.1,.1
|
||||
steps=400000,450000
|
||||
scales=.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
@ -203,11 +204,19 @@ activation=leaky
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=64
|
||||
activation=leaky
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-3
|
||||
layers=-1,-4
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
@ -224,14 +233,15 @@ pad=1
|
||||
filters=425
|
||||
activation=linear
|
||||
|
||||
|
||||
[region]
|
||||
anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741
|
||||
anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
|
||||
bias_match=1
|
||||
classes=80
|
||||
coords=4
|
||||
num=5
|
||||
softmax=1
|
||||
jitter=.2
|
||||
jitter=.3
|
||||
rescore=1
|
||||
|
||||
object_scale=5
|
||||
@ -241,4 +251,4 @@ coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=0
|
||||
random=1
|
||||
|
@ -10,10 +10,10 @@ classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat"
|
||||
|
||||
|
||||
def convert(size, box):
|
||||
dw = 1./size[0]
|
||||
dh = 1./size[1]
|
||||
x = (box[0] + box[1])/2.0
|
||||
y = (box[2] + box[3])/2.0
|
||||
dw = 1./(size[0])
|
||||
dh = 1./(size[1])
|
||||
x = (box[0] + box[1])/2.0 - 1
|
||||
y = (box[2] + box[3])/2.0 - 1
|
||||
w = box[1] - box[0]
|
||||
h = box[3] - box[2]
|
||||
x = x*dw
|
||||
@ -34,7 +34,7 @@ def convert_annotation(year, image_id):
|
||||
for obj in root.iter('object'):
|
||||
difficult = obj.find('difficult').text
|
||||
cls = obj.find('name').text
|
||||
if cls not in classes or int(difficult) == 1:
|
||||
if cls not in classes or int(difficult)==1:
|
||||
continue
|
||||
cls_id = classes.index(cls)
|
||||
xmlbox = obj.find('bndbox')
|
||||
@ -54,3 +54,6 @@ for year, image_set in sets:
|
||||
convert_annotation(year, image_id)
|
||||
list_file.close()
|
||||
|
||||
os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt")
|
||||
os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include "convolutional_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "blas.h"
|
||||
#include <stdio.h>
|
||||
@ -5,55 +6,67 @@
|
||||
layer make_batchnorm_layer(int batch, int w, int h, int c)
|
||||
{
|
||||
fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
|
||||
layer layer = {0};
|
||||
layer.type = BATCHNORM;
|
||||
layer.batch = batch;
|
||||
layer.h = layer.out_h = h;
|
||||
layer.w = layer.out_w = w;
|
||||
layer.c = layer.out_c = c;
|
||||
layer.output = calloc(h * w * c * batch, sizeof(float));
|
||||
layer.delta = calloc(h * w * c * batch, sizeof(float));
|
||||
layer.inputs = w*h*c;
|
||||
layer.outputs = layer.inputs;
|
||||
layer l = {0};
|
||||
l.type = BATCHNORM;
|
||||
l.batch = batch;
|
||||
l.h = l.out_h = h;
|
||||
l.w = l.out_w = w;
|
||||
l.c = l.out_c = c;
|
||||
l.output = calloc(h * w * c * batch, sizeof(float));
|
||||
l.delta = calloc(h * w * c * batch, sizeof(float));
|
||||
l.inputs = w*h*c;
|
||||
l.outputs = l.inputs;
|
||||
|
||||
layer.scales = calloc(c, sizeof(float));
|
||||
layer.scale_updates = calloc(c, sizeof(float));
|
||||
l.scales = calloc(c, sizeof(float));
|
||||
l.scale_updates = calloc(c, sizeof(float));
|
||||
l.biases = calloc(c, sizeof(float));
|
||||
l.bias_updates = calloc(c, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < c; ++i){
|
||||
layer.scales[i] = 1;
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
layer.mean = calloc(c, sizeof(float));
|
||||
layer.variance = calloc(c, sizeof(float));
|
||||
l.mean = calloc(c, sizeof(float));
|
||||
l.variance = calloc(c, sizeof(float));
|
||||
|
||||
layer.rolling_mean = calloc(c, sizeof(float));
|
||||
layer.rolling_variance = calloc(c, sizeof(float));
|
||||
l.rolling_mean = calloc(c, sizeof(float));
|
||||
l.rolling_variance = calloc(c, sizeof(float));
|
||||
|
||||
layer.forward = forward_batchnorm_layer;
|
||||
layer.backward = backward_batchnorm_layer;
|
||||
l.forward = forward_batchnorm_layer;
|
||||
l.backward = backward_batchnorm_layer;
|
||||
#ifdef GPU
|
||||
layer.forward_gpu = forward_batchnorm_layer_gpu;
|
||||
layer.backward_gpu = backward_batchnorm_layer_gpu;
|
||||
l.forward_gpu = forward_batchnorm_layer_gpu;
|
||||
l.backward_gpu = backward_batchnorm_layer_gpu;
|
||||
|
||||
layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
|
||||
layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);
|
||||
l.output_gpu = cuda_make_array(l.output, h * w * c * batch);
|
||||
l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch);
|
||||
|
||||
layer.scales_gpu = cuda_make_array(layer.scales, c);
|
||||
layer.scale_updates_gpu = cuda_make_array(layer.scale_updates, c);
|
||||
l.biases_gpu = cuda_make_array(l.biases, c);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, c);
|
||||
|
||||
layer.mean_gpu = cuda_make_array(layer.mean, c);
|
||||
layer.variance_gpu = cuda_make_array(layer.variance, c);
|
||||
l.scales_gpu = cuda_make_array(l.scales, c);
|
||||
l.scale_updates_gpu = cuda_make_array(l.scale_updates, c);
|
||||
|
||||
layer.rolling_mean_gpu = cuda_make_array(layer.mean, c);
|
||||
layer.rolling_variance_gpu = cuda_make_array(layer.variance, c);
|
||||
l.mean_gpu = cuda_make_array(l.mean, c);
|
||||
l.variance_gpu = cuda_make_array(l.variance, c);
|
||||
|
||||
layer.mean_delta_gpu = cuda_make_array(layer.mean, c);
|
||||
layer.variance_delta_gpu = cuda_make_array(layer.variance, c);
|
||||
l.rolling_mean_gpu = cuda_make_array(l.mean, c);
|
||||
l.rolling_variance_gpu = cuda_make_array(l.variance, c);
|
||||
|
||||
layer.x_gpu = cuda_make_array(layer.output, layer.batch*layer.outputs);
|
||||
layer.x_norm_gpu = cuda_make_array(layer.output, layer.batch*layer.outputs);
|
||||
l.mean_delta_gpu = cuda_make_array(l.mean, c);
|
||||
l.variance_delta_gpu = cuda_make_array(l.variance, c);
|
||||
|
||||
l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs);
|
||||
l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs);
|
||||
#ifdef CUDNN
|
||||
cudnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||
cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
return layer;
|
||||
return l;
|
||||
}
|
||||
|
||||
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
|
||||
@ -108,7 +121,7 @@ void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_del
|
||||
for(f = 0; f < filters; ++f){
|
||||
for(k = 0; k < spatial; ++k){
|
||||
int index = j*filters*spatial + f*spatial + k;
|
||||
delta[index] = delta[index] * 1./(sqrt(variance[f]) + .00001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
|
||||
delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -130,10 +143,10 @@ void forward_batchnorm_layer(layer l, network_state state)
|
||||
mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
|
||||
variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);
|
||||
|
||||
scal_cpu(l.out_c, .9, l.rolling_mean, 1);
|
||||
axpy_cpu(l.out_c, .1, l.mean, 1, l.rolling_mean, 1);
|
||||
scal_cpu(l.out_c, .9, l.rolling_variance, 1);
|
||||
axpy_cpu(l.out_c, .1, l.variance, 1, l.rolling_variance, 1);
|
||||
scal_cpu(l.out_c, .99, l.rolling_mean, 1);
|
||||
axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1);
|
||||
scal_cpu(l.out_c, .99, l.rolling_variance, 1);
|
||||
axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);
|
||||
|
||||
copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
|
||||
normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
@ -142,10 +155,12 @@ void forward_batchnorm_layer(layer l, network_state state)
|
||||
normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
}
|
||||
scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
}
|
||||
|
||||
void backward_batchnorm_layer(const layer l, network_state state)
|
||||
{
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h);
|
||||
backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates);
|
||||
|
||||
scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
@ -179,6 +194,28 @@ void forward_batchnorm_layer_gpu(layer l, network_state state)
|
||||
l.out_h = l.out_w = 1;
|
||||
}
|
||||
if (state.train) {
|
||||
#ifdef CUDNN
|
||||
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
|
||||
float one = 1;
|
||||
float zero = 0;
|
||||
cudnnBatchNormalizationForwardTraining(cudnn_handle(),
|
||||
CUDNN_BATCHNORM_SPATIAL,
|
||||
&one,
|
||||
&zero,
|
||||
l.dstTensorDesc,
|
||||
l.x_gpu,
|
||||
l.dstTensorDesc,
|
||||
l.output_gpu,
|
||||
l.normTensorDesc,
|
||||
l.scales_gpu,
|
||||
l.biases_gpu,
|
||||
.01,
|
||||
l.rolling_mean_gpu,
|
||||
l.rolling_variance_gpu,
|
||||
.00001,
|
||||
l.mean_gpu,
|
||||
l.variance_gpu);
|
||||
#else
|
||||
fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);
|
||||
fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu);
|
||||
|
||||
@ -190,15 +227,45 @@ void forward_batchnorm_layer_gpu(layer l, network_state state)
|
||||
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
|
||||
normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1);
|
||||
|
||||
scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
|
||||
#endif
|
||||
} else {
|
||||
normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
|
||||
}
|
||||
|
||||
scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
}
|
||||
|
||||
void backward_batchnorm_layer_gpu(const layer l, network_state state)
|
||||
{
|
||||
#ifdef CUDNN
|
||||
float one = 1;
|
||||
float zero = 0;
|
||||
cudnnBatchNormalizationBackward(cudnn_handle(),
|
||||
CUDNN_BATCHNORM_SPATIAL,
|
||||
&one,
|
||||
&zero,
|
||||
&one,
|
||||
&one,
|
||||
l.dstTensorDesc,
|
||||
l.x_gpu,
|
||||
l.dstTensorDesc,
|
||||
l.delta_gpu,
|
||||
l.dstTensorDesc,
|
||||
l.x_norm_gpu,
|
||||
l.normTensorDesc,
|
||||
l.scales_gpu,
|
||||
l.scale_updates_gpu,
|
||||
l.bias_updates_gpu,
|
||||
.00001,
|
||||
l.mean_gpu,
|
||||
l.variance_gpu);
|
||||
copy_ongpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1);
|
||||
#else
|
||||
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h);
|
||||
backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu);
|
||||
|
||||
scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
@ -206,6 +273,7 @@ void backward_batchnorm_layer_gpu(const layer l, network_state state)
|
||||
fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu);
|
||||
fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu);
|
||||
normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
|
||||
#endif
|
||||
if(l.type == BATCHNORM) copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
|
||||
}
|
||||
#endif
|
||||
|
33
src/blas.c
33
src/blas.c
@ -179,11 +179,21 @@ void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
}
|
||||
else {
|
||||
error[i] = 2*abs_val - 1;
|
||||
delta[i] = (diff < 0) ? -1 : 1;
|
||||
delta[i] = (diff < 0) ? 1 : -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
float diff = truth[i] - pred[i];
|
||||
error[i] = fabs(diff);
|
||||
delta[i] = diff > 0 ? 1 : -1;
|
||||
}
|
||||
}
|
||||
|
||||
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
int i;
|
||||
@ -202,21 +212,32 @@ float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
|
||||
return dot;
|
||||
}
|
||||
|
||||
void softmax(float *input, int n, float temp, float *output)
|
||||
void softmax(float *input, int n, float temp, int stride, float *output)
|
||||
{
|
||||
int i;
|
||||
float sum = 0;
|
||||
float largest = -FLT_MAX;
|
||||
for(i = 0; i < n; ++i){
|
||||
if(input[i] > largest) largest = input[i];
|
||||
if(input[i*stride] > largest) largest = input[i*stride];
|
||||
}
|
||||
for(i = 0; i < n; ++i){
|
||||
float e = exp(input[i]/temp - largest/temp);
|
||||
float e = exp(input[i*stride]/temp - largest/temp);
|
||||
sum += e;
|
||||
output[i] = e;
|
||||
output[i*stride] = e;
|
||||
}
|
||||
for(i = 0; i < n; ++i){
|
||||
output[i] /= sum;
|
||||
output[i*stride] /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
|
||||
{
|
||||
int g, b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(g = 0; g < groups; ++g){
|
||||
softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,9 +33,11 @@ void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_del
|
||||
|
||||
void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
|
||||
|
||||
void softmax(float *input, int n, float temp, float *output);
|
||||
void softmax(float *input, int n, float temp, int stride, float *output);
|
||||
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
|
||||
|
||||
#ifdef GPU
|
||||
#include "cuda.h"
|
||||
@ -45,6 +47,7 @@ void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float
|
||||
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
|
||||
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
||||
void scal_ongpu(int N, float ALPHA, float * X, int INCX);
|
||||
void add_ongpu(int N, float ALPHA, float * X, int INCX);
|
||||
void supp_ongpu(int N, float ALPHA, float * X, int INCX);
|
||||
void mask_ongpu(int N, float * X, float mask_num, float * mask);
|
||||
void const_ongpu(int N, float ALPHA, float *X, int INCX);
|
||||
@ -72,13 +75,14 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
|
||||
|
||||
void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
void l2_gpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
void l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc);
|
||||
void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c);
|
||||
void mult_add_into_gpu(int num, float *a, float *b, float *c);
|
||||
|
||||
void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
|
||||
|
||||
void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output);
|
||||
void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
|
||||
void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t);
|
||||
|
||||
void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out);
|
||||
|
@ -161,7 +161,7 @@ __global__ void normalize_kernel(int N, float *x, float *mean, float *variance,
|
||||
if (index >= N) return;
|
||||
int f = (index/spatial)%filters;
|
||||
|
||||
x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f);
|
||||
x[index] = (x[index] - mean[f])/(sqrt(variance[f] + .00001f));
|
||||
}
|
||||
|
||||
__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
|
||||
@ -170,7 +170,7 @@ __global__ void normalize_delta_kernel(int N, float *x, float *mean, float *vari
|
||||
if (index >= N) return;
|
||||
int f = (index/spatial)%filters;
|
||||
|
||||
delta[index] = delta[index] * 1./(sqrt(variance[f]) + .000001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
|
||||
delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
|
||||
}
|
||||
|
||||
extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
|
||||
@ -192,7 +192,7 @@ __global__ void variance_delta_kernel(float *x, float *delta, float *mean, floa
|
||||
variance_delta[i] += delta[index]*(x[index] - mean[i]);
|
||||
}
|
||||
}
|
||||
variance_delta[i] *= -.5 * pow(variance[i] + .000001f, (float)(-3./2.));
|
||||
variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.));
|
||||
}
|
||||
|
||||
__global__ void accumulate_kernel(float *x, int n, int groups, float *sum)
|
||||
@ -224,12 +224,14 @@ __global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch,
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(id == 0){
|
||||
mean_delta[filter] = 0;
|
||||
for(i = 0; i < threads; ++i){
|
||||
mean_delta[filter] += local[i];
|
||||
}
|
||||
mean_delta[filter] *= (-1./sqrt(variance[filter] + .000001f));
|
||||
mean_delta[filter] *= (-1./sqrt(variance[filter] + .00001f));
|
||||
}
|
||||
}
|
||||
|
||||
@ -252,12 +254,14 @@ __global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean,
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(id == 0){
|
||||
variance_delta[filter] = 0;
|
||||
for(i = 0; i < threads; ++i){
|
||||
variance_delta[filter] += local[i];
|
||||
}
|
||||
variance_delta[filter] *= -.5 * pow(variance[filter] + .000001f, (float)(-3./2.));
|
||||
variance_delta[filter] *= -.5 * pow(variance[filter] + .00001f, (float)(-3./2.));
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,7 +278,7 @@ __global__ void mean_delta_kernel(float *delta, float *variance, int batch, int
|
||||
mean_delta[i] += delta[index];
|
||||
}
|
||||
}
|
||||
mean_delta[i] *= (-1./sqrt(variance[i] + .000001f));
|
||||
mean_delta[i] *= (-1./sqrt(variance[i] + .00001f));
|
||||
}
|
||||
|
||||
extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
|
||||
@ -391,6 +395,12 @@ __global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void add_kernel(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(i < N) X[i*INCX] += ALPHA;
|
||||
}
|
||||
|
||||
__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
@ -447,6 +457,8 @@ __global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial,
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(id == 0){
|
||||
mean[filter] = 0;
|
||||
for(i = 0; i < threads; ++i){
|
||||
@ -475,6 +487,8 @@ __global__ void fast_variance_kernel(float *x, float *mean, int batch, int filt
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(id == 0){
|
||||
variance[filter] = 0;
|
||||
for(i = 0; i < threads; ++i){
|
||||
@ -593,6 +607,12 @@ extern "C" void constrain_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||
}
|
||||
|
||||
|
||||
extern "C" void add_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||
{
|
||||
add_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||
{
|
||||
scal_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||
@ -658,7 +678,7 @@ __global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta,
|
||||
}
|
||||
else {
|
||||
error[i] = 2*abs_val - 1;
|
||||
delta[i] = (diff < 0) ? -1 : 1;
|
||||
delta[i] = (diff > 0) ? 1 : -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -685,6 +705,23 @@ extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *er
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(i < n){
|
||||
float diff = truth[i] - pred[i];
|
||||
error[i] = abs(diff);
|
||||
delta[i] = (diff > 0) ? 1 : -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
l1_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c)
|
||||
@ -732,36 +769,36 @@ extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c)
|
||||
}
|
||||
|
||||
|
||||
__device__ void softmax_device(int n, float *input, float temp, float *output)
|
||||
__device__ void softmax_device(float *input, int n, float temp, int stride, float *output)
|
||||
{
|
||||
int i;
|
||||
float sum = 0;
|
||||
float largest = -INFINITY;
|
||||
for(i = 0; i < n; ++i){
|
||||
int val = input[i];
|
||||
int val = input[i*stride];
|
||||
largest = (val>largest) ? val : largest;
|
||||
}
|
||||
for(i = 0; i < n; ++i){
|
||||
float e = exp(input[i]/temp - largest/temp);
|
||||
float e = exp(input[i*stride]/temp - largest/temp);
|
||||
sum += e;
|
||||
output[i] = e;
|
||||
output[i*stride] = e;
|
||||
}
|
||||
for(i = 0; i < n; ++i){
|
||||
output[i] /= sum;
|
||||
output[i*stride] /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void softmax_kernel(int n, int offset, int batch, float *input, float temp, float *output)
|
||||
__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
|
||||
{
|
||||
int b = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(b >= batch) return;
|
||||
softmax_device(n, input + b*offset, temp, output + b*offset);
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (id >= batch*groups) return;
|
||||
int b = id / groups;
|
||||
int g = id % groups;
|
||||
softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
|
||||
}
|
||||
|
||||
extern "C" void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output)
|
||||
extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
|
||||
{
|
||||
int inputs = n;
|
||||
int batch = groups;
|
||||
softmax_kernel<<<cuda_gridsize(batch), BLOCK>>>(inputs, offset, batch, input, temp, output);
|
||||
softmax_kernel<<<cuda_gridsize(batch*groups), BLOCK>>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
@ -3,13 +3,13 @@
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
box float_to_box(float *f)
|
||||
box float_to_box(float *f, int stride)
|
||||
{
|
||||
box b;
|
||||
b.x = f[0];
|
||||
b.y = f[1];
|
||||
b.w = f[2];
|
||||
b.h = f[3];
|
||||
b.y = f[1*stride];
|
||||
b.w = f[2*stride];
|
||||
b.h = f[3*stride];
|
||||
return b;
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@ typedef struct{
|
||||
float dx, dy, dw, dh;
|
||||
} dbox;
|
||||
|
||||
box float_to_box(float *f);
|
||||
box float_to_box(float *f, int stride);
|
||||
float box_iou(box a, box b);
|
||||
float box_rmse(box a, box b);
|
||||
dbox diou(box a, box b);
|
||||
|
@ -379,7 +379,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
|
||||
float *pred = calloc(classes, sizeof(float));
|
||||
for(j = 0; j < 10; ++j){
|
||||
float *p = network_predict(net, images[j].data);
|
||||
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
|
||||
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1, 1);
|
||||
axpy_cpu(classes, 1, p, 1, pred, 1);
|
||||
free_image(images[j]);
|
||||
}
|
||||
@ -440,7 +440,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
|
||||
//show_image(crop, "cropped");
|
||||
//cvWaitKey(0);
|
||||
float *pred = network_predict(net, resized.data);
|
||||
if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1);
|
||||
if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1, 1);
|
||||
|
||||
free_image(im);
|
||||
free_image(resized);
|
||||
@ -502,7 +502,7 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
|
||||
//show_image(crop, "cropped");
|
||||
//cvWaitKey(0);
|
||||
float *pred = network_predict(net, crop.data);
|
||||
if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1);
|
||||
if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1, 1);
|
||||
|
||||
if(resized.data != im.data) free_image(resized);
|
||||
free_image(im);
|
||||
@ -563,7 +563,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
|
||||
image r = resize_min(im, scales[j]);
|
||||
resize_network(&net, r.w, r.h);
|
||||
float *p = network_predict(net, r.data);
|
||||
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
|
||||
if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1 , 1);
|
||||
axpy_cpu(classes, 1, p, 1, pred, 1);
|
||||
flip_image(r);
|
||||
p = network_predict(net, r.data);
|
||||
@ -703,7 +703,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
|
||||
float *X = r.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 0);
|
||||
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 0, 1);
|
||||
top_k(predictions, net.outputs, top, indexes);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
for(i = 0; i < top; ++i){
|
||||
@ -1084,7 +1084,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
|
||||
show_image(in, "Classifier");
|
||||
|
||||
float *predictions = network_predict(net, in_s.data);
|
||||
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1);
|
||||
if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1, 1);
|
||||
top_predictions(net, top, indexes);
|
||||
|
||||
printf("\033[2J");
|
||||
|
@ -117,26 +117,70 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
||||
|
||||
if (l.batch_normalize) {
|
||||
forward_batchnorm_layer_gpu(l, state);
|
||||
} else {
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
}
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
|
||||
activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
|
||||
//if(l.dot > 0) dot_error_gpu(l);
|
||||
if(l.binary || l.xnor) swap_binary(&l);
|
||||
}
|
||||
|
||||
__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta)
|
||||
{
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(id >= n) return;
|
||||
|
||||
int j = id % w;
|
||||
id /= w;
|
||||
int i = id % h;
|
||||
id /= h;
|
||||
int k = id % c;
|
||||
id /= c;
|
||||
int b = id;
|
||||
|
||||
int w_offset = -(size/2.);
|
||||
int h_offset = -(size/2.);
|
||||
|
||||
int out_index = j + w*(i + h*(k + c*b));
|
||||
int l, m;
|
||||
for(l = 0; l < size; ++l){
|
||||
for(m = 0; m < size; ++m){
|
||||
int cur_h = h_offset + i + l;
|
||||
int cur_w = w_offset + j + m;
|
||||
int index = cur_w + w*(cur_h + h*(k + b*c));
|
||||
int valid = (cur_h >= 0 && cur_h < h &&
|
||||
cur_w >= 0 && cur_w < w);
|
||||
delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void smooth_layer(layer l, int size, float rate)
|
||||
{
|
||||
int h = l.out_h;
|
||||
int w = l.out_w;
|
||||
int c = l.out_c;
|
||||
|
||||
size_t n = h*w*c*l.batch;
|
||||
|
||||
smooth_kernel<<<cuda_gridsize(n), BLOCK>>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
||||
{
|
||||
if(l.smooth){
|
||||
smooth_layer(l, 5, l.smooth);
|
||||
}
|
||||
//constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
|
||||
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
|
||||
|
||||
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
|
||||
if(l.batch_normalize){
|
||||
backward_batchnorm_layer_gpu(l, state);
|
||||
//axpy_ongpu(l.outputs*l.batch, -state.net.decay, l.x_gpu, 1, l.delta_gpu, 1);
|
||||
} else {
|
||||
//axpy_ongpu(l.outputs*l.batch, -state.net.decay, l.output_gpu, 1, l.delta_gpu, 1);
|
||||
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
}
|
||||
float *original_input = state.input;
|
||||
|
||||
|
@ -23,11 +23,11 @@ void swap_binary(convolutional_layer *l)
|
||||
l->weights = l->binary_weights;
|
||||
l->binary_weights = swap;
|
||||
|
||||
#ifdef GPU
|
||||
#ifdef GPU
|
||||
swap = l->weights_gpu;
|
||||
l->weights_gpu = l->binary_weights_gpu;
|
||||
l->binary_weights_gpu = swap;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void binarize_weights(float *weights, int n, int size, float *binary)
|
||||
@ -80,23 +80,15 @@ int convolutional_out_width(convolutional_layer l)
|
||||
|
||||
image get_convolutional_image(convolutional_layer l)
|
||||
{
|
||||
int h,w,c;
|
||||
h = convolutional_out_height(l);
|
||||
w = convolutional_out_width(l);
|
||||
c = l.n;
|
||||
return float_to_image(w,h,c,l.output);
|
||||
return float_to_image(l.out_w,l.out_h,l.out_c,l.output);
|
||||
}
|
||||
|
||||
image get_convolutional_delta(convolutional_layer l)
|
||||
{
|
||||
int h,w,c;
|
||||
h = convolutional_out_height(l);
|
||||
w = convolutional_out_width(l);
|
||||
c = l.n;
|
||||
return float_to_image(w,h,c,l.delta);
|
||||
return float_to_image(l.out_w,l.out_h,l.out_c,l.delta);
|
||||
}
|
||||
|
||||
size_t get_workspace_size(layer l){
|
||||
static size_t get_workspace_size(layer l){
|
||||
#ifdef CUDNN
|
||||
if(gpu_index >= 0){
|
||||
size_t most = 0;
|
||||
@ -127,7 +119,7 @@ size_t get_workspace_size(layer l){
|
||||
if (s > most) most = s;
|
||||
return most;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
return (size_t)l.out_h*l.out_w*l.size*l.size*l.c*sizeof(float);
|
||||
}
|
||||
|
||||
@ -141,6 +133,7 @@ void cudnn_convolutional_setup(layer *l)
|
||||
|
||||
cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
|
||||
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
|
||||
cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
|
||||
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
|
||||
@ -198,8 +191,8 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
||||
// float scale = 1./sqrt(size*size*c);
|
||||
float scale = sqrt(2./(size*size*c));
|
||||
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
|
||||
int out_h = convolutional_out_height(l);
|
||||
int out_w = convolutional_out_width(l);
|
||||
int out_h = convolutional_out_height(l);
|
||||
l.out_h = out_h;
|
||||
l.out_w = out_w;
|
||||
l.out_c = n;
|
||||
@ -291,6 +284,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
||||
l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.srcTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
cudnnCreateFilterDescriptor(&l.weightDesc);
|
||||
@ -426,8 +420,8 @@ void backward_bias(float *bias_updates, float *delta, int batch, int n, int size
|
||||
|
||||
void forward_convolutional_layer(convolutional_layer l, network_state state)
|
||||
{
|
||||
int out_h = convolutional_out_height(l);
|
||||
int out_w = convolutional_out_width(l);
|
||||
int out_h = l.out_h;
|
||||
int out_w = l.out_w;
|
||||
int i;
|
||||
|
||||
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
|
||||
@ -458,8 +452,9 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
|
||||
|
||||
if(l.batch_normalize){
|
||||
forward_batchnorm_layer(l, state);
|
||||
} else {
|
||||
add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
|
||||
}
|
||||
add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
|
||||
|
||||
activate_array(l.output, m*n*l.batch, l.activation);
|
||||
if(l.binary || l.xnor) swap_binary(&l);
|
||||
@ -470,14 +465,14 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
|
||||
int i;
|
||||
int m = l.n;
|
||||
int n = l.size*l.size*l.c;
|
||||
int k = convolutional_out_height(l)*
|
||||
convolutional_out_width(l);
|
||||
int k = l.out_w*l.out_h;
|
||||
|
||||
gradient_array(l.output, m*k*l.batch, l.activation, l.delta);
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
|
||||
|
||||
if(l.batch_normalize){
|
||||
backward_batchnorm_layer(l, state);
|
||||
} else {
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
|
||||
}
|
||||
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
|
@ -12,6 +12,7 @@ COST_TYPE get_cost_type(char *s)
|
||||
if (strcmp(s, "sse")==0) return SSE;
|
||||
if (strcmp(s, "masked")==0) return MASKED;
|
||||
if (strcmp(s, "smooth")==0) return SMOOTH;
|
||||
if (strcmp(s, "L1")==0) return L1;
|
||||
fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
|
||||
return SSE;
|
||||
}
|
||||
@ -25,6 +26,8 @@ char *get_cost_string(COST_TYPE a)
|
||||
return "masked";
|
||||
case SMOOTH:
|
||||
return "smooth";
|
||||
case L1:
|
||||
return "L1";
|
||||
}
|
||||
return "sse";
|
||||
}
|
||||
@ -81,6 +84,8 @@ void forward_cost_layer(cost_layer l, network_state state)
|
||||
}
|
||||
if(l.cost_type == SMOOTH){
|
||||
smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
|
||||
}else if(l.cost_type == L1){
|
||||
l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
|
||||
} else {
|
||||
l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
|
||||
}
|
||||
@ -116,12 +121,18 @@ int float_abs_compare (const void * a, const void * b)
|
||||
void forward_cost_layer_gpu(cost_layer l, network_state state)
|
||||
{
|
||||
if (!state.truth) return;
|
||||
if(l.smooth){
|
||||
scal_ongpu(l.batch*l.inputs, (1-l.smooth), state.truth, 1);
|
||||
add_ongpu(l.batch*l.inputs, l.smooth * 1./l.inputs, state.truth, 1);
|
||||
}
|
||||
if (l.cost_type == MASKED) {
|
||||
mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth);
|
||||
}
|
||||
|
||||
if(l.cost_type == SMOOTH){
|
||||
smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
|
||||
} else if (l.cost_type == L1){
|
||||
l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
|
||||
} else {
|
||||
l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
|
||||
}
|
||||
@ -136,6 +147,10 @@ void forward_cost_layer_gpu(cost_layer l, network_state state)
|
||||
supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
|
||||
}
|
||||
|
||||
if(l.thresh){
|
||||
supp_ongpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1);
|
||||
}
|
||||
|
||||
cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
|
||||
l.cost[0] = sum_array(l.output, l.batch*l.inputs);
|
||||
}
|
||||
|
@ -157,4 +157,13 @@ void cuda_pull_array(float *x_gpu, float *x, size_t n)
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
float cuda_mag_array(float *x_gpu, size_t n)
|
||||
{
|
||||
float *temp = calloc(n, sizeof(float));
|
||||
cuda_pull_array(x_gpu, temp, n);
|
||||
float m = mag_array(temp, n);
|
||||
free(temp);
|
||||
return m;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -26,6 +26,7 @@ void cuda_free(float *x_gpu);
|
||||
void cuda_random(float *x_gpu, size_t n);
|
||||
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
|
||||
dim3 cuda_gridsize(size_t n);
|
||||
float cuda_mag_array(float *x_gpu, size_t n);
|
||||
|
||||
#ifdef CUDNN
|
||||
cudnnHandle_t cudnn_handle();
|
||||
|
@ -24,6 +24,7 @@ extern void run_nightmare(int argc, char **argv);
|
||||
extern void run_dice(int argc, char **argv);
|
||||
extern void run_compare(int argc, char **argv);
|
||||
extern void run_classifier(int argc, char **argv);
|
||||
extern void run_regressor(int argc, char **argv);
|
||||
extern void run_char_rnn(int argc, char **argv);
|
||||
extern void run_vid_rnn(int argc, char **argv);
|
||||
extern void run_tag(int argc, char **argv);
|
||||
@ -31,6 +32,7 @@ extern void run_cifar(int argc, char **argv);
|
||||
extern void run_go(int argc, char **argv);
|
||||
extern void run_art(int argc, char **argv);
|
||||
extern void run_super(int argc, char **argv);
|
||||
extern void run_lsd(int argc, char **argv);
|
||||
|
||||
void average(int argc, char *argv[])
|
||||
{
|
||||
@ -95,7 +97,7 @@ void speed(char *cfgfile, int tics)
|
||||
set_batch_network(&net, 1);
|
||||
int i;
|
||||
time_t start = time(0);
|
||||
image im = make_image(net.w, net.h, net.c);
|
||||
image im = make_image(net.w, net.h, net.c*net.batch);
|
||||
for(i = 0; i < tics; ++i){
|
||||
network_predict(net, im.data);
|
||||
}
|
||||
@ -150,12 +152,24 @@ void oneoff(char *cfgfile, char *weightfile, char *outfile)
|
||||
save_weights(net, outfile);
|
||||
}
|
||||
|
||||
void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l)
|
||||
{
|
||||
gpu_index = -1;
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights_upto(&net, weightfile, 0, net.n);
|
||||
load_weights_upto(&net, weightfile, l, net.n);
|
||||
}
|
||||
*net.seen = 0;
|
||||
save_weights_upto(net, outfile, net.n);
|
||||
}
|
||||
|
||||
void partial(char *cfgfile, char *weightfile, char *outfile, int max)
|
||||
{
|
||||
gpu_index = -1;
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights_upto(&net, weightfile, max);
|
||||
load_weights_upto(&net, weightfile, 0, max);
|
||||
}
|
||||
*net.seen = 0;
|
||||
save_weights_upto(net, outfile, max);
|
||||
@ -380,6 +394,8 @@ int main(int argc, char **argv)
|
||||
run_voxel(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "super")){
|
||||
run_super(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "lsd")){
|
||||
run_lsd(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "detector")){
|
||||
run_detector(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "detect")){
|
||||
@ -400,6 +416,8 @@ int main(int argc, char **argv)
|
||||
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
|
||||
} else if (0 == strcmp(argv[1], "classifier")){
|
||||
run_classifier(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "regressor")){
|
||||
run_regressor(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "art")){
|
||||
run_art(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "tag")){
|
||||
@ -436,6 +454,8 @@ int main(int argc, char **argv)
|
||||
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
|
||||
} else if (0 == strcmp(argv[1], "oneoff")){
|
||||
oneoff(argv[2], argv[3], argv[4]);
|
||||
} else if (0 == strcmp(argv[1], "oneoff2")){
|
||||
oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
|
||||
} else if (0 == strcmp(argv[1], "partial")){
|
||||
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
|
||||
} else if (0 == strcmp(argv[1], "average")){
|
||||
|
100
src/data.c
100
src/data.c
@ -317,7 +317,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
|
||||
h = boxes[i].h;
|
||||
id = boxes[i].id;
|
||||
|
||||
if ((w < .005 || h < .005)) continue;
|
||||
if ((w < .001 || h < .001)) continue;
|
||||
|
||||
truth[i*5+0] = x;
|
||||
truth[i*5+1] = y;
|
||||
@ -393,7 +393,7 @@ void fill_truth(char *path, char **labels, int k, float *truth)
|
||||
++count;
|
||||
}
|
||||
}
|
||||
if(count != 1) printf("Too many or too few labels: %d, %s\n", count, path);
|
||||
if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path);
|
||||
}
|
||||
|
||||
void fill_hierarchy(float *truth, int k, tree *hierarchy)
|
||||
@ -428,6 +428,24 @@ void fill_hierarchy(float *truth, int k, tree *hierarchy)
|
||||
}
|
||||
}
|
||||
|
||||
matrix load_regression_labels_paths(char **paths, int n)
|
||||
{
|
||||
matrix y = make_matrix(n, 1);
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
char labelpath[4096];
|
||||
find_replace(paths[i], "images", "targets", labelpath);
|
||||
find_replace(labelpath, "JPEGImages", "targets", labelpath);
|
||||
find_replace(labelpath, ".jpg", ".txt", labelpath);
|
||||
find_replace(labelpath, ".png", ".txt", labelpath);
|
||||
|
||||
FILE *file = fopen(labelpath, "r");
|
||||
fscanf(file, "%f", &(y.vals[i][0]));
|
||||
fclose(file);
|
||||
}
|
||||
return y;
|
||||
}
|
||||
|
||||
matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy)
|
||||
{
|
||||
matrix y = make_matrix(n, k);
|
||||
@ -673,45 +691,44 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
|
||||
d.y = make_matrix(n, 5*boxes);
|
||||
for(i = 0; i < n; ++i){
|
||||
image orig = load_image_color(random_paths[i], 0, 0);
|
||||
image sized = make_image(w, h, orig.c);
|
||||
fill_image(sized, .5);
|
||||
|
||||
float dw = jitter * orig.w;
|
||||
float dh = jitter * orig.h;
|
||||
|
||||
int oh = orig.h;
|
||||
int ow = orig.w;
|
||||
float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh));
|
||||
float scale = rand_uniform(.25, 2);
|
||||
|
||||
int dw = (ow*jitter);
|
||||
int dh = (oh*jitter);
|
||||
float nw, nh;
|
||||
|
||||
if(new_ar < 1){
|
||||
nh = scale * h;
|
||||
nw = nh * new_ar;
|
||||
} else {
|
||||
nw = scale * w;
|
||||
nh = nw / new_ar;
|
||||
}
|
||||
|
||||
int pleft = rand_uniform(-dw, dw);
|
||||
int pright = rand_uniform(-dw, dw);
|
||||
int ptop = rand_uniform(-dh, dh);
|
||||
int pbot = rand_uniform(-dh, dh);
|
||||
float dx = rand_uniform(0, w - nw);
|
||||
float dy = rand_uniform(0, h - nh);
|
||||
|
||||
int swidth = ow - pleft - pright;
|
||||
int sheight = oh - ptop - pbot;
|
||||
place_image(orig, nw, nh, dx, dy, sized);
|
||||
|
||||
float sx = (float)swidth / ow;
|
||||
float sy = (float)sheight / oh;
|
||||
|
||||
int flip = rand()%2;
|
||||
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
|
||||
|
||||
float dx = ((float)pleft/ow)/sx;
|
||||
float dy = ((float)ptop /oh)/sy;
|
||||
|
||||
image sized = resize_image(cropped, w, h);
|
||||
if(flip) flip_image(sized);
|
||||
random_distort_image(sized, hue, saturation, exposure);
|
||||
int flip = rand()%2;
|
||||
if(flip) flip_image(sized);
|
||||
d.X.vals[i] = sized.data;
|
||||
|
||||
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
|
||||
|
||||
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h);
|
||||
|
||||
free_image(orig);
|
||||
free_image(cropped);
|
||||
}
|
||||
free(random_paths);
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
void *load_thread(void *ptr)
|
||||
{
|
||||
//printf("Loading data: %d\n", rand());
|
||||
@ -722,6 +739,8 @@ void *load_thread(void *ptr)
|
||||
|
||||
if (a.type == OLD_CLASSIFICATION_DATA){
|
||||
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||
} else if (a.type == REGRESSION_DATA){
|
||||
*a.d = load_data_regression(a.paths, a.n, a.m, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == CLASSIFICATION_DATA){
|
||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == SUPER_DATA){
|
||||
@ -739,6 +758,9 @@ void *load_thread(void *ptr)
|
||||
} else if (a.type == IMAGE_DATA){
|
||||
*(a.im) = load_image_color(a.path, 0, 0);
|
||||
*(a.resized) = resize_image(*(a.im), a.w, a.h);
|
||||
} else if (a.type == LETTERBOX_DATA){
|
||||
*(a.im) = load_image_color(a.path, 0, 0);
|
||||
*(a.resized) = letterbox_image(*(a.im), a.w, a.h);
|
||||
} else if (a.type == TAG_DATA){
|
||||
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
|
||||
}
|
||||
@ -863,6 +885,17 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_regression(char **paths, int n, int m, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
|
||||
{
|
||||
if(m) paths = get_random_paths(paths, n, m);
|
||||
data d = {0};
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
|
||||
d.y = load_regression_labels_paths(paths, n);
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
|
||||
{
|
||||
if(m) paths = get_random_paths(paths, n, m);
|
||||
@ -962,7 +995,6 @@ data load_cifar10_data(char *filename)
|
||||
X.vals[i][j] = (double)bytes[j+1];
|
||||
}
|
||||
}
|
||||
//translate_data_rows(d, -128);
|
||||
scale_data_rows(d, 1./255);
|
||||
//normalize_data_rows(d);
|
||||
fclose(fp);
|
||||
@ -1029,7 +1061,6 @@ data load_all_cifar10()
|
||||
fclose(fp);
|
||||
}
|
||||
//normalize_data_rows(d);
|
||||
//translate_data_rows(d, -128);
|
||||
scale_data_rows(d, 1./255);
|
||||
smooth_data(d);
|
||||
return d;
|
||||
@ -1113,6 +1144,19 @@ void translate_data_rows(data d, float s)
|
||||
}
|
||||
}
|
||||
|
||||
data copy_data(data d)
|
||||
{
|
||||
data c = {0};
|
||||
c.w = d.w;
|
||||
c.h = d.h;
|
||||
c.shallow = 0;
|
||||
c.num_boxes = d.num_boxes;
|
||||
c.boxes = d.boxes;
|
||||
c.X = copy_matrix(d.X);
|
||||
c.y = copy_matrix(d.y);
|
||||
return c;
|
||||
}
|
||||
|
||||
void normalize_data_rows(data d)
|
||||
{
|
||||
int i;
|
||||
|
@ -28,7 +28,7 @@ typedef struct{
|
||||
} data;
|
||||
|
||||
typedef enum {
|
||||
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA
|
||||
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA
|
||||
} data_type;
|
||||
|
||||
typedef struct load_args{
|
||||
@ -83,6 +83,7 @@ data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||
data load_data_regression(char **paths, int n, int m, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||
data load_go(char *filename);
|
||||
|
||||
box_label *read_boxes(char *filename, int *n);
|
||||
@ -106,5 +107,6 @@ data *split_data(data d, int part, int total);
|
||||
data concat_data(data d1, data d2);
|
||||
data concat_datas(data *d, int n);
|
||||
void fill_truth(char *path, char **labels, int k, float *truth);
|
||||
data copy_data(data d);
|
||||
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@
|
||||
extern "C" {
|
||||
#include "convolutional_layer.h"
|
||||
#include "deconvolutional_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "gemm.h"
|
||||
#include "blas.h"
|
||||
#include "im2col.h"
|
||||
@ -13,97 +14,119 @@ extern "C" {
|
||||
#include "cuda.h"
|
||||
}
|
||||
|
||||
extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
||||
extern "C" void forward_deconvolutional_layer_gpu(layer l, network_state state)
|
||||
{
|
||||
int i;
|
||||
int out_h = deconvolutional_out_height(layer);
|
||||
int out_w = deconvolutional_out_width(layer);
|
||||
int out_h = l.out_h;
|
||||
int out_w = l.out_w;
|
||||
int size = out_h*out_w;
|
||||
|
||||
int m = layer.size*layer.size*layer.n;
|
||||
int n = layer.h*layer.w;
|
||||
int k = layer.c;
|
||||
int m = l.size*l.size*l.n;
|
||||
int n = l.h*l.w;
|
||||
int k = l.c;
|
||||
|
||||
fill_ongpu(layer.outputs*layer.batch, 0, layer.output_gpu, 1);
|
||||
fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);
|
||||
|
||||
for(i = 0; i < layer.batch; ++i){
|
||||
float *a = layer.weights_gpu;
|
||||
float *b = state.input + i*layer.c*layer.h*layer.w;
|
||||
float *c = layer.col_image_gpu;
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
float *a = l.weights_gpu;
|
||||
float *b = state.input + i*l.c*l.h*l.w;
|
||||
float *c = state.workspace;
|
||||
|
||||
gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
|
||||
|
||||
col2im_ongpu(c, layer.n, out_h, out_w, layer.size, layer.stride, 0, layer.output_gpu+i*layer.n*size);
|
||||
col2im_ongpu(c, l.n, out_h, out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.n*size);
|
||||
}
|
||||
add_bias_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, size);
|
||||
activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation);
|
||||
if (l.batch_normalize) {
|
||||
forward_batchnorm_layer_gpu(l, state);
|
||||
} else {
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
}
|
||||
activate_array_ongpu(l.output_gpu, l.batch*l.n*size, l.activation);
|
||||
}
|
||||
|
||||
extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
|
||||
extern "C" void backward_deconvolutional_layer_gpu(layer l, network_state state)
|
||||
{
|
||||
float alpha = 1./layer.batch;
|
||||
int out_h = deconvolutional_out_height(layer);
|
||||
int out_w = deconvolutional_out_width(layer);
|
||||
int out_h = l.out_h;
|
||||
int out_w = l.out_w;
|
||||
int size = out_h*out_w;
|
||||
int i;
|
||||
|
||||
gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu);
|
||||
backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size);
|
||||
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
|
||||
|
||||
if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
|
||||
if(l.batch_normalize){
|
||||
backward_batchnorm_layer_gpu(l, state);
|
||||
} else {
|
||||
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
}
|
||||
|
||||
for(i = 0; i < layer.batch; ++i){
|
||||
int m = layer.c;
|
||||
int n = layer.size*layer.size*layer.n;
|
||||
int k = layer.h*layer.w;
|
||||
//if(state.delta) memset(state.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));
|
||||
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
int m = l.c;
|
||||
int n = l.size*l.size*l.n;
|
||||
int k = l.h*l.w;
|
||||
|
||||
float *a = state.input + i*m*n;
|
||||
float *b = layer.col_image_gpu;
|
||||
float *c = layer.weight_updates_gpu;
|
||||
float *b = state.workspace;
|
||||
float *c = l.weight_updates_gpu;
|
||||
|
||||
im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w,
|
||||
layer.size, layer.stride, 0, b);
|
||||
gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
|
||||
im2col_ongpu(l.delta_gpu + i*l.n*size, l.n, out_h, out_w,
|
||||
l.size, l.stride, l.pad, b);
|
||||
gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
|
||||
|
||||
if(state.delta){
|
||||
int m = layer.c;
|
||||
int n = layer.h*layer.w;
|
||||
int k = layer.size*layer.size*layer.n;
|
||||
int m = l.c;
|
||||
int n = l.h*l.w;
|
||||
int k = l.size*l.size*l.n;
|
||||
|
||||
float *a = layer.weights_gpu;
|
||||
float *b = layer.col_image_gpu;
|
||||
float *a = l.weights_gpu;
|
||||
float *b = state.workspace;
|
||||
float *c = state.delta + i*n*m;
|
||||
|
||||
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void pull_deconvolutional_layer(deconvolutional_layer layer)
|
||||
extern "C" void pull_deconvolutional_layer(layer l)
|
||||
{
|
||||
cuda_pull_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
|
||||
cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
|
||||
cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size);
|
||||
cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
||||
cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size);
|
||||
cuda_pull_array(l.biases_gpu, l.biases, l.n);
|
||||
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size);
|
||||
cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
||||
if (l.batch_normalize){
|
||||
cuda_pull_array(l.scales_gpu, l.scales, l.n);
|
||||
cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
|
||||
cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
|
||||
extern "C" void push_deconvolutional_layer(layer l)
|
||||
{
|
||||
cuda_push_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
|
||||
cuda_push_array(layer.biases_gpu, layer.biases, layer.n);
|
||||
cuda_push_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size);
|
||||
cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
|
||||
cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size);
|
||||
cuda_push_array(l.biases_gpu, l.biases, l.n);
|
||||
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size);
|
||||
cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
||||
if (l.batch_normalize){
|
||||
cuda_push_array(l.scales_gpu, l.scales, l.n);
|
||||
cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
|
||||
cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
|
||||
void update_deconvolutional_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay)
|
||||
{
|
||||
int size = layer.size*layer.size*layer.c*layer.n;
|
||||
int size = l.size*l.size*l.c*l.n;
|
||||
axpy_ongpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
|
||||
scal_ongpu(l.n, momentum, l.bias_updates_gpu, 1);
|
||||
|
||||
axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
|
||||
scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1);
|
||||
if(l.scales_gpu){
|
||||
axpy_ongpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
|
||||
scal_ongpu(l.n, momentum, l.scale_updates_gpu, 1);
|
||||
}
|
||||
|
||||
axpy_ongpu(size, -decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
|
||||
axpy_ongpu(size, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
|
||||
scal_ongpu(size, momentum, layer.weight_updates_gpu, 1);
|
||||
axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
|
||||
axpy_ongpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
|
||||
scal_ongpu(size, momentum, l.weight_updates_gpu, 1);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "deconvolutional_layer.h"
|
||||
#include "convolutional_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "utils.h"
|
||||
#include "im2col.h"
|
||||
#include "col2im.h"
|
||||
@ -8,45 +9,25 @@
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
int deconvolutional_out_height(deconvolutional_layer l)
|
||||
{
|
||||
int h = l.stride*(l.h - 1) + l.size;
|
||||
return h;
|
||||
|
||||
static size_t get_workspace_size(layer l){
|
||||
return (size_t)l.h*l.w*l.size*l.size*l.c*sizeof(float);
|
||||
}
|
||||
|
||||
int deconvolutional_out_width(deconvolutional_layer l)
|
||||
int deconvolutional_out_height(layer l)
|
||||
{
|
||||
int w = l.stride*(l.w - 1) + l.size;
|
||||
return w;
|
||||
return (l.h) * l.stride + l.size/2 - l.pad;
|
||||
}
|
||||
|
||||
int deconvolutional_out_size(deconvolutional_layer l)
|
||||
int deconvolutional_out_width(layer l)
|
||||
{
|
||||
return deconvolutional_out_height(l) * deconvolutional_out_width(l);
|
||||
return (l.w) * l.stride + l.size/2 - l.pad;
|
||||
}
|
||||
|
||||
image get_deconvolutional_image(deconvolutional_layer l)
|
||||
{
|
||||
int h,w,c;
|
||||
h = deconvolutional_out_height(l);
|
||||
w = deconvolutional_out_width(l);
|
||||
c = l.n;
|
||||
return float_to_image(w,h,c,l.output);
|
||||
}
|
||||
|
||||
image get_deconvolutional_delta(deconvolutional_layer l)
|
||||
{
|
||||
int h,w,c;
|
||||
h = deconvolutional_out_height(l);
|
||||
w = deconvolutional_out_width(l);
|
||||
c = l.n;
|
||||
return float_to_image(w,h,c,l.delta);
|
||||
}
|
||||
|
||||
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
|
||||
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, int batch_normalize)
|
||||
{
|
||||
int i;
|
||||
deconvolutional_layer l = {0};
|
||||
layer l = {0};
|
||||
l.type = DECONVOLUTIONAL;
|
||||
|
||||
l.h = h;
|
||||
@ -67,71 +48,135 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
|
||||
for(i = 0; i < n; ++i){
|
||||
l.biases[i] = scale;
|
||||
}
|
||||
int out_h = deconvolutional_out_height(l);
|
||||
int out_w = deconvolutional_out_width(l);
|
||||
l.pad = l.size/2;
|
||||
|
||||
l.out_h = out_h;
|
||||
l.out_w = out_w;
|
||||
l.out_h = (l.h) * l.stride + l.size/2 - l.pad;
|
||||
l.out_w = (l.w) * l.stride + l.size/2 - l.pad;
|
||||
l.out_c = n;
|
||||
l.outputs = l.out_w * l.out_h * l.out_c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
|
||||
l.col_image = calloc(h*w*size*size*n, sizeof(float));
|
||||
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
l.output = calloc(l.batch*l.out_h * l.out_w * n, sizeof(float));
|
||||
l.delta = calloc(l.batch*l.out_h * l.out_w * n, sizeof(float));
|
||||
|
||||
l.forward = forward_deconvolutional_layer;
|
||||
l.backward = backward_deconvolutional_layer;
|
||||
l.update = update_deconvolutional_layer;
|
||||
|
||||
#ifdef GPU
|
||||
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
l.biases_gpu = cuda_make_array(l.biases, n);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
||||
if(batch_normalize){
|
||||
l.scales = calloc(n, sizeof(float));
|
||||
l.scale_updates = calloc(n, sizeof(float));
|
||||
for(i = 0; i < n; ++i){
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
l.col_image_gpu = cuda_make_array(l.col_image, h*w*size*size*n);
|
||||
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
|
||||
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
l.mean = calloc(n, sizeof(float));
|
||||
l.variance = calloc(n, sizeof(float));
|
||||
|
||||
l.mean_delta = calloc(n, sizeof(float));
|
||||
l.variance_delta = calloc(n, sizeof(float));
|
||||
|
||||
l.rolling_mean = calloc(n, sizeof(float));
|
||||
l.rolling_variance = calloc(n, sizeof(float));
|
||||
l.x = calloc(l.batch*l.outputs, sizeof(float));
|
||||
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_deconvolutional_layer_gpu;
|
||||
l.backward_gpu = backward_deconvolutional_layer_gpu;
|
||||
l.update_gpu = update_deconvolutional_layer_gpu;
|
||||
|
||||
if(gpu_index >= 0){
|
||||
|
||||
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
|
||||
|
||||
l.biases_gpu = cuda_make_array(l.biases, n);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n);
|
||||
l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
|
||||
|
||||
if(batch_normalize){
|
||||
l.mean_gpu = cuda_make_array(l.mean, n);
|
||||
l.variance_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.rolling_mean_gpu = cuda_make_array(l.mean, n);
|
||||
l.rolling_variance_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.mean_delta_gpu = cuda_make_array(l.mean, n);
|
||||
l.variance_delta_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.scales_gpu = cuda_make_array(l.scales, n);
|
||||
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
|
||||
|
||||
l.x_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
|
||||
l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
|
||||
}
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||
cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
l.activation = activation;
|
||||
l.workspace_size = get_workspace_size(l);
|
||||
|
||||
fprintf(stderr, "Deconvolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
|
||||
fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w)
|
||||
void resize_deconvolutional_layer(layer *l, int h, int w)
|
||||
{
|
||||
l->h = h;
|
||||
l->w = w;
|
||||
int out_h = deconvolutional_out_height(*l);
|
||||
int out_w = deconvolutional_out_width(*l);
|
||||
l->out_h = (l->h) * l->stride + l->size/2 - l->pad;
|
||||
l->out_w = (l->w) * l->stride + l->size/2 - l->pad;
|
||||
|
||||
l->col_image = realloc(l->col_image,
|
||||
out_h*out_w*l->size*l->size*l->c*sizeof(float));
|
||||
l->output = realloc(l->output,
|
||||
l->batch*out_h * out_w * l->n*sizeof(float));
|
||||
l->delta = realloc(l->delta,
|
||||
l->batch*out_h * out_w * l->n*sizeof(float));
|
||||
#ifdef GPU
|
||||
cuda_free(l->col_image_gpu);
|
||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||
l->inputs = l->w * l->h * l->c;
|
||||
|
||||
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
|
||||
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
|
||||
if(l->batch_normalize){
|
||||
l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
|
||||
l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
cuda_free(l->delta_gpu);
|
||||
cuda_free(l->output_gpu);
|
||||
|
||||
l->col_image_gpu = cuda_make_array(l->col_image, out_h*out_w*l->size*l->size*l->c);
|
||||
l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*l->n);
|
||||
l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*l->n);
|
||||
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
|
||||
l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
|
||||
if(l->batch_normalize){
|
||||
cuda_free(l->x_gpu);
|
||||
cuda_free(l->x_norm_gpu);
|
||||
|
||||
l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
|
||||
#endif
|
||||
#endif
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
}
|
||||
|
||||
void forward_deconvolutional_layer(const deconvolutional_layer l, network_state state)
|
||||
void forward_deconvolutional_layer(const layer l, network_state state)
|
||||
{
|
||||
int i;
|
||||
int out_h = deconvolutional_out_height(l);
|
||||
int out_w = deconvolutional_out_width(l);
|
||||
int out_h = l.out_h;
|
||||
int out_w = l.out_w;
|
||||
int size = out_h*out_w;
|
||||
|
||||
int m = l.size*l.size*l.n;
|
||||
@ -143,17 +188,22 @@ void forward_deconvolutional_layer(const deconvolutional_layer l, network_state
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
float *a = l.weights;
|
||||
float *b = state.input + i*l.c*l.h*l.w;
|
||||
float *c = l.col_image;
|
||||
float *c = state.workspace;
|
||||
|
||||
gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);
|
||||
|
||||
col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size);
|
||||
}
|
||||
add_bias(l.output, l.biases, l.batch, l.n, size);
|
||||
|
||||
if(l.batch_normalize){
|
||||
forward_batchnorm_layer(l, state);
|
||||
} else {
|
||||
add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
|
||||
}
|
||||
activate_array(l.output, l.batch*l.n*size, l.activation);
|
||||
}
|
||||
|
||||
void backward_deconvolutional_layer(deconvolutional_layer l, network_state state)
|
||||
void backward_deconvolutional_layer(layer l, network_state state)
|
||||
{
|
||||
float alpha = 1./l.batch;
|
||||
int out_h = deconvolutional_out_height(l);
|
||||
@ -162,7 +212,11 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
|
||||
int i;
|
||||
|
||||
gradient_array(l.output, size*l.n*l.batch, l.activation, l.delta);
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.n, size);
|
||||
if(l.batch_normalize){
|
||||
backward_batchnorm_layer(l, state);
|
||||
} else {
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
|
||||
}
|
||||
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
int m = l.c;
|
||||
@ -170,7 +224,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
|
||||
int k = l.h*l.w;
|
||||
|
||||
float *a = state.input + i*m*n;
|
||||
float *b = l.col_image;
|
||||
float *b = state.workspace;
|
||||
float *c = l.weight_updates;
|
||||
|
||||
im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w,
|
||||
@ -183,7 +237,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
|
||||
int k = l.size*l.size*l.n;
|
||||
|
||||
float *a = l.weights;
|
||||
float *b = l.col_image;
|
||||
float *b = state.workspace;
|
||||
float *c = state.delta + i*n*m;
|
||||
|
||||
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
@ -191,14 +245,19 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
|
||||
}
|
||||
}
|
||||
|
||||
void update_deconvolutional_layer(deconvolutional_layer l, float learning_rate, float momentum, float decay)
|
||||
void update_deconvolutional_layer(layer l, int batch, float learning_rate, float momentum, float decay)
|
||||
{
|
||||
int size = l.size*l.size*l.c*l.n;
|
||||
axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1);
|
||||
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
|
||||
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
||||
|
||||
axpy_cpu(size, -decay, l.weights, 1, l.weight_updates, 1);
|
||||
axpy_cpu(size, learning_rate, l.weight_updates, 1, l.weights, 1);
|
||||
if(l.scales){
|
||||
axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
|
||||
scal_cpu(l.n, momentum, l.scale_updates, 1);
|
||||
}
|
||||
|
||||
axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
|
||||
axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
|
||||
scal_cpu(size, momentum, l.weight_updates, 1);
|
||||
}
|
||||
|
||||
|
@ -7,28 +7,19 @@
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
|
||||
typedef layer deconvolutional_layer;
|
||||
|
||||
#ifdef GPU
|
||||
void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
|
||||
void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
|
||||
void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
|
||||
void push_deconvolutional_layer(deconvolutional_layer layer);
|
||||
void pull_deconvolutional_layer(deconvolutional_layer layer);
|
||||
void forward_deconvolutional_layer_gpu(layer l, network_state state);
|
||||
void backward_deconvolutional_layer_gpu(layer l, network_state state);
|
||||
void update_deconvolutional_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
void push_deconvolutional_layer(layer l);
|
||||
void pull_deconvolutional_layer(layer l);
|
||||
#endif
|
||||
|
||||
deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
|
||||
void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
|
||||
void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
|
||||
void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
|
||||
void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
|
||||
|
||||
image get_deconvolutional_image(deconvolutional_layer layer);
|
||||
image get_deconvolutional_delta(deconvolutional_layer layer);
|
||||
image get_deconvolutional_filter(deconvolutional_layer layer, int i);
|
||||
|
||||
int deconvolutional_out_height(deconvolutional_layer layer);
|
||||
int deconvolutional_out_width(deconvolutional_layer layer);
|
||||
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, int batch_normalize);
|
||||
void resize_deconvolutional_layer(layer *l, int h, int w);
|
||||
void forward_deconvolutional_layer(const layer l, network_state state);
|
||||
void update_deconvolutional_layer(layer l, int batch, float learning_rate, float momentum, float decay);
|
||||
void backward_deconvolutional_layer(layer l, network_state state);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -58,7 +58,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
|
||||
int index = b*l.inputs;
|
||||
for (i = 0; i < locations; ++i) {
|
||||
int offset = i*l.classes;
|
||||
softmax(l.output + index + offset, l.classes, 1,
|
||||
softmax(l.output + index + offset, l.classes, 1, 1,
|
||||
l.output + index + offset);
|
||||
}
|
||||
}
|
||||
@ -101,13 +101,13 @@ void forward_detection_layer(const detection_layer l, network_state state)
|
||||
avg_allcat += l.output[class_index+j];
|
||||
}
|
||||
|
||||
box truth = float_to_box(state.truth + truth_index + 1 + l.classes);
|
||||
box truth = float_to_box(state.truth + truth_index + 1 + l.classes, 1);
|
||||
truth.x /= l.side;
|
||||
truth.y /= l.side;
|
||||
|
||||
for(j = 0; j < l.n; ++j){
|
||||
int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords;
|
||||
box out = float_to_box(l.output + box_index);
|
||||
box out = float_to_box(l.output + box_index, 1);
|
||||
out.x /= l.side;
|
||||
out.y /= l.side;
|
||||
|
||||
@ -146,7 +146,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
|
||||
int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords;
|
||||
int tbox_index = truth_index + 1 + l.classes;
|
||||
|
||||
box out = float_to_box(l.output + box_index);
|
||||
box out = float_to_box(l.output + box_index, 1);
|
||||
out.x /= l.side;
|
||||
out.y /= l.side;
|
||||
if (l.sqrt) {
|
||||
|
202
src/detector.c
202
src/detector.c
@ -6,6 +6,7 @@
|
||||
#include "box.h"
|
||||
#include "demo.h"
|
||||
#include "option_list.h"
|
||||
#include "blas.h"
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
@ -103,21 +104,28 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
||||
load_thread = load_data(args);
|
||||
|
||||
/*
|
||||
int k;
|
||||
for(k = 0; k < l.max_boxes; ++k){
|
||||
box b = float_to_box(train.y.vals[10] + 1 + k*5);
|
||||
if(!b.x) break;
|
||||
printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
|
||||
}
|
||||
image im = float_to_image(448, 448, 3, train.X.vals[10]);
|
||||
int k;
|
||||
for(k = 0; k < l.max_boxes; ++k){
|
||||
box b = float_to_box(train.y.vals[10] + 1 + k*5);
|
||||
printf("%d %d %d %d\n", truth.x, truth.y, truth.w, truth.h);
|
||||
draw_bbox(im, b, 8, 1,0,0);
|
||||
}
|
||||
save_image(im, "truth11");
|
||||
*/
|
||||
int k;
|
||||
for(k = 0; k < l.max_boxes; ++k){
|
||||
box b = float_to_box(train.y.vals[10] + 1 + k*5);
|
||||
if(!b.x) break;
|
||||
printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
|
||||
}
|
||||
*/
|
||||
/*
|
||||
int zz;
|
||||
for(zz = 0; zz < train.X.cols; ++zz){
|
||||
image im = float_to_image(net.w, net.h, 3, train.X.vals[zz]);
|
||||
int k;
|
||||
for(k = 0; k < l.max_boxes; ++k){
|
||||
box b = float_to_box(train.y.vals[zz] + k*5);
|
||||
printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
|
||||
draw_bbox(im, b, 1, 1,0,0);
|
||||
}
|
||||
show_image(im, "truth11");
|
||||
cvWaitKey(0);
|
||||
save_image(im, "truth11");
|
||||
}
|
||||
*/
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
@ -192,13 +200,13 @@ void print_detector_detections(FILE **fps, char *id, box *boxes, float **probs,
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < total; ++i){
|
||||
float xmin = boxes[i].x - boxes[i].w/2.;
|
||||
float xmax = boxes[i].x + boxes[i].w/2.;
|
||||
float ymin = boxes[i].y - boxes[i].h/2.;
|
||||
float ymax = boxes[i].y + boxes[i].h/2.;
|
||||
float xmin = boxes[i].x - boxes[i].w/2. + 1;
|
||||
float xmax = boxes[i].x + boxes[i].w/2. + 1;
|
||||
float ymin = boxes[i].y - boxes[i].h/2. + 1;
|
||||
float ymax = boxes[i].y + boxes[i].h/2. + 1;
|
||||
|
||||
if (xmin < 0) xmin = 0;
|
||||
if (ymin < 0) ymin = 0;
|
||||
if (xmin < 1) xmin = 1;
|
||||
if (ymin < 1) ymin = 1;
|
||||
if (xmax > w) xmax = w;
|
||||
if (ymax > h) ymax = h;
|
||||
|
||||
@ -231,6 +239,142 @@ void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int
|
||||
}
|
||||
}
|
||||
|
||||
void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
int j;
|
||||
list *options = read_data_cfg(datacfg);
|
||||
char *valid_images = option_find_str(options, "valid", "data/train.list");
|
||||
char *name_list = option_find_str(options, "names", "data/names.list");
|
||||
char *prefix = option_find_str(options, "results", "results");
|
||||
char **names = get_labels(name_list);
|
||||
char *mapf = option_find_str(options, "map", 0);
|
||||
int *map = 0;
|
||||
if (mapf) map = read_map(mapf);
|
||||
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 2);
|
||||
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
srand(time(0));
|
||||
|
||||
list *plist = get_paths(valid_images);
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
|
||||
layer l = net.layers[net.n-1];
|
||||
int classes = l.classes;
|
||||
|
||||
char buff[1024];
|
||||
char *type = option_find_str(options, "eval", "voc");
|
||||
FILE *fp = 0;
|
||||
FILE **fps = 0;
|
||||
int coco = 0;
|
||||
int imagenet = 0;
|
||||
if(0==strcmp(type, "coco")){
|
||||
if(!outfile) outfile = "coco_results";
|
||||
snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
|
||||
fp = fopen(buff, "w");
|
||||
fprintf(fp, "[\n");
|
||||
coco = 1;
|
||||
} else if(0==strcmp(type, "imagenet")){
|
||||
if(!outfile) outfile = "imagenet-detection";
|
||||
snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
|
||||
fp = fopen(buff, "w");
|
||||
imagenet = 1;
|
||||
classes = 200;
|
||||
} else {
|
||||
if(!outfile) outfile = "comp4_det_test_";
|
||||
fps = calloc(classes, sizeof(FILE *));
|
||||
for(j = 0; j < classes; ++j){
|
||||
snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
|
||||
fps[j] = fopen(buff, "w");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
|
||||
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
|
||||
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
int t;
|
||||
|
||||
float thresh = .005;
|
||||
float nms = .45;
|
||||
|
||||
int nthreads = 4;
|
||||
image *val = calloc(nthreads, sizeof(image));
|
||||
image *val_resized = calloc(nthreads, sizeof(image));
|
||||
image *buf = calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
|
||||
|
||||
image input = make_image(net.w, net.h, net.c*2);
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
//args.type = IMAGE_DATA;
|
||||
args.type = LETTERBOX_DATA;
|
||||
|
||||
for(t = 0; t < nthreads; ++t){
|
||||
args.path = paths[i+t];
|
||||
args.im = &buf[t];
|
||||
args.resized = &buf_resized[t];
|
||||
thr[t] = load_data_in_thread(args);
|
||||
}
|
||||
time_t start = time(0);
|
||||
for(i = nthreads; i < m+nthreads; i += nthreads){
|
||||
fprintf(stderr, "%d\n", i);
|
||||
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
|
||||
pthread_join(thr[t], 0);
|
||||
val[t] = buf[t];
|
||||
val_resized[t] = buf_resized[t];
|
||||
}
|
||||
for(t = 0; t < nthreads && i+t < m; ++t){
|
||||
args.path = paths[i+t];
|
||||
args.im = &buf[t];
|
||||
args.resized = &buf_resized[t];
|
||||
thr[t] = load_data_in_thread(args);
|
||||
}
|
||||
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
|
||||
char *path = paths[i+t-nthreads];
|
||||
char *id = basecfg(path);
|
||||
copy_cpu(net.w*net.h*net.c, val_resized[t].data, 1, input.data, 1);
|
||||
flip_image(val_resized[t]);
|
||||
copy_cpu(net.w*net.h*net.c, val_resized[t].data, 1, input.data + net.w*net.h*net.c, 1);
|
||||
|
||||
network_predict(net, input.data);
|
||||
int w = val[t].w;
|
||||
int h = val[t].h;
|
||||
get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5);
|
||||
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
|
||||
if (coco){
|
||||
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
|
||||
} else if (imagenet){
|
||||
print_imagenet_detections(fp, i+t-nthreads+1, boxes, probs, l.w*l.h*l.n, classes, w, h);
|
||||
} else {
|
||||
print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h);
|
||||
}
|
||||
free(id);
|
||||
free_image(val[t]);
|
||||
free_image(val_resized[t]);
|
||||
}
|
||||
}
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(fps) fclose(fps[j]);
|
||||
}
|
||||
if(coco){
|
||||
fseek(fp, -2, SEEK_CUR);
|
||||
fprintf(fp, "\n]\n");
|
||||
fclose(fp);
|
||||
}
|
||||
fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
|
||||
}
|
||||
|
||||
|
||||
void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
int j;
|
||||
@ -306,7 +450,8 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.type = IMAGE_DATA;
|
||||
//args.type = IMAGE_DATA;
|
||||
args.type = LETTERBOX_DATA;
|
||||
|
||||
for(t = 0; t < nthreads; ++t){
|
||||
args.path = paths[i+t];
|
||||
@ -467,7 +612,11 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input,0,0);
|
||||
image sized = resize_image(im, net.w, net.h);
|
||||
image sized = letterbox_image(im, net.w, net.h);
|
||||
//image sized = resize_image(im, net.w, net.h);
|
||||
//image sized2 = resize_max(im, net.w);
|
||||
//image sized = crop_image(sized2, -((net.w - sized2.w)/2), -((net.h - sized2.h)/2), net.w, net.h);
|
||||
//resize_network(&net, sized.w, sized.h);
|
||||
layer l = net.layers[net.n-1];
|
||||
|
||||
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
|
||||
@ -481,9 +630,9 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
||||
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);
|
||||
if (l.softmax_tree && nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
|
||||
else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
|
||||
draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
|
||||
save_image(im, "predictions");
|
||||
show_image(im, "predictions");
|
||||
draw_detections(sized, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
|
||||
save_image(sized, "predictions");
|
||||
show_image(sized, "predictions");
|
||||
|
||||
free_image(im);
|
||||
free_image(sized);
|
||||
@ -541,6 +690,7 @@ void run_detector(int argc, char **argv)
|
||||
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh);
|
||||
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
|
||||
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
|
||||
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "demo")) {
|
||||
list *options = read_data_cfg(datacfg);
|
||||
|
326
src/go.c
326
src/go.c
@ -3,6 +3,8 @@
|
||||
#include "parser.h"
|
||||
#include "option_list.h"
|
||||
#include "blas.h"
|
||||
#include "data.h"
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
@ -10,7 +12,7 @@
|
||||
|
||||
int inverted = 1;
|
||||
int noi = 1;
|
||||
static const int nind = 5;
|
||||
static const int nind = 2;
|
||||
|
||||
typedef struct {
|
||||
char **data;
|
||||
@ -88,22 +90,30 @@ void board_to_string(char *s, float *board)
|
||||
}
|
||||
}
|
||||
|
||||
void random_go_moves(moves m, float *boards, float *labels, int n)
|
||||
data random_go_moves(moves m, int n)
|
||||
{
|
||||
data d = {0};
|
||||
d.X = make_matrix(n, 19*19);
|
||||
d.y = make_matrix(n, 19*19+1);
|
||||
int i;
|
||||
memset(labels, 0, 19*19*n*sizeof(float));
|
||||
for(i = 0; i < n; ++i){
|
||||
float *board = d.X.vals[i];
|
||||
float *label = d.y.vals[i];
|
||||
char *b = m.data[rand()%m.n];
|
||||
int row = b[0];
|
||||
int col = b[1];
|
||||
labels[col + 19*(row + i*19)] = 1;
|
||||
string_to_board(b+2, boards+i*19*19);
|
||||
boards[col + 19*(row + i*19)] = 0;
|
||||
if(row >= 19 || col >= 19){
|
||||
label[19*19] = 1;
|
||||
} else {
|
||||
label[col + 19*row] = 1;
|
||||
string_to_board(b+2, board);
|
||||
if(board[col + 19*row]) printf("hey\n");
|
||||
}
|
||||
|
||||
int flip = rand()%2;
|
||||
int rotate = rand()%4;
|
||||
image in = float_to_image(19, 19, 1, boards+i*19*19);
|
||||
image out = float_to_image(19, 19, 1, labels+i*19*19);
|
||||
image in = float_to_image(19, 19, 1, board);
|
||||
image out = float_to_image(19, 19, 1, label);
|
||||
if(flip){
|
||||
flip_image(in);
|
||||
flip_image(out);
|
||||
@ -111,36 +121,60 @@ void random_go_moves(moves m, float *boards, float *labels, int n)
|
||||
rotate_image_cw(in, rotate);
|
||||
rotate_image_cw(out, rotate);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
void train_go(char *cfgfile, char *weightfile)
|
||||
void train_go(char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
|
||||
{
|
||||
srand(time(0));
|
||||
int i;
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
printf("%d\n", ngpus);
|
||||
network *nets = calloc(ngpus, sizeof(network));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
srand(seed);
|
||||
#ifdef GPU
|
||||
cuda_set_device(gpus[i]);
|
||||
#endif
|
||||
nets[i] = load_network(cfgfile, weightfile, clear);
|
||||
nets[i].learning_rate *= ngpus;
|
||||
}
|
||||
network net = nets[0];
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
|
||||
char buff[256];
|
||||
float *board = calloc(19*19*net.batch, sizeof(float));
|
||||
float *move = calloc(19*19*net.batch, sizeof(float));
|
||||
moves m = load_go_moves("/home/pjreddie/backup/go.train");
|
||||
//moves m = load_go_moves("games.txt");
|
||||
|
||||
int N = m.n;
|
||||
printf("Moves: %d\n", N);
|
||||
int epoch = (*net.seen)/N;
|
||||
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
||||
clock_t time=clock();
|
||||
|
||||
random_go_moves(m, board, move, net.batch);
|
||||
float loss = train_network_datum(net, board, move) / net.batch;
|
||||
data train = random_go_moves(m, net.batch*net.subdivisions*ngpus);
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
|
||||
float loss = 0;
|
||||
#ifdef GPU
|
||||
if(ngpus == 1){
|
||||
loss = train_network(net, train);
|
||||
} else {
|
||||
loss = train_networks(nets, ngpus, train, 4);
|
||||
}
|
||||
#else
|
||||
loss = train_network(net, train);
|
||||
#endif
|
||||
free_data(train);
|
||||
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.95 + loss*.05;
|
||||
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
||||
@ -151,7 +185,7 @@ void train_go(char *cfgfile, char *weightfile)
|
||||
save_weights(net, buff);
|
||||
|
||||
}
|
||||
if(get_current_batch(net)%100 == 0){
|
||||
if(get_current_batch(net)%1000 == 0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup",backup_directory,base);
|
||||
save_weights(net, buff);
|
||||
@ -204,12 +238,9 @@ int *calculate_liberties(float *board)
|
||||
return lib;
|
||||
}
|
||||
|
||||
void print_board(float *board, int swap, int *indexes)
|
||||
void print_board(FILE *stream, float *board, int swap, int *indexes)
|
||||
{
|
||||
//FILE *stream = stdout;
|
||||
FILE *stream = stderr;
|
||||
int i,j,n;
|
||||
fprintf(stream, "\n\n");
|
||||
fprintf(stream, " ");
|
||||
for(i = 0; i < 19; ++i){
|
||||
fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi));
|
||||
@ -225,12 +256,12 @@ void print_board(float *board, int swap, int *indexes)
|
||||
if(index == indexes[n]){
|
||||
found = 1;
|
||||
/*
|
||||
if(n == 0) fprintf(stream, "\uff11");
|
||||
else if(n == 1) fprintf(stream, "\uff12");
|
||||
else if(n == 2) fprintf(stream, "\uff13");
|
||||
else if(n == 3) fprintf(stream, "\uff14");
|
||||
else if(n == 4) fprintf(stream, "\uff15");
|
||||
*/
|
||||
if(n == 0) fprintf(stream, "\uff11");
|
||||
else if(n == 1) fprintf(stream, "\uff12");
|
||||
else if(n == 2) fprintf(stream, "\uff13");
|
||||
else if(n == 3) fprintf(stream, "\uff14");
|
||||
else if(n == 4) fprintf(stream, "\uff15");
|
||||
*/
|
||||
if(n == 0) fprintf(stream, " 1");
|
||||
else if(n == 1) fprintf(stream, " 2");
|
||||
else if(n == 2) fprintf(stream, " 3");
|
||||
@ -261,7 +292,7 @@ void flip_board(float *board)
|
||||
void predict_move(network net, float *board, float *move, int multi)
|
||||
{
|
||||
float *output = network_predict(net, board);
|
||||
copy_cpu(19*19, output, 1, move, 1);
|
||||
copy_cpu(19*19+1, output, 1, move, 1);
|
||||
int i;
|
||||
if(multi){
|
||||
image bim = float_to_image(19, 19, 1, board);
|
||||
@ -275,12 +306,12 @@ void predict_move(network net, float *board, float *move, int multi)
|
||||
if(i >= 4) flip_image(oim);
|
||||
rotate_image_cw(oim, -i);
|
||||
|
||||
axpy_cpu(19*19, 1, output, 1, move, 1);
|
||||
axpy_cpu(19*19+1, 1, output, 1, move, 1);
|
||||
|
||||
if(i >= 4) flip_image(bim);
|
||||
rotate_image_cw(bim, -i);
|
||||
}
|
||||
scal_cpu(19*19, 1./8., move, 1);
|
||||
scal_cpu(19*19+1, 1./8., move, 1);
|
||||
}
|
||||
for(i = 0; i < 19*19; ++i){
|
||||
if(board[i]) move[i] = 0;
|
||||
@ -350,14 +381,24 @@ int legal_go(float *b, char *ko, int p, int r, int c)
|
||||
int generate_move(network net, int player, float *board, int multi, float thresh, float temp, char *ko, int print)
|
||||
{
|
||||
int i, j;
|
||||
int empty = 1;
|
||||
for(i = 0; i < 19*19; ++i){
|
||||
if (board[i]) {
|
||||
empty = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(empty) {
|
||||
return 72;
|
||||
}
|
||||
for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
|
||||
|
||||
float move[361];
|
||||
float move[362];
|
||||
if (player < 0) flip_board(board);
|
||||
predict_move(net, board, move, multi);
|
||||
if (player < 0) flip_board(board);
|
||||
|
||||
|
||||
|
||||
for(i = 0; i < 19; ++i){
|
||||
for(j = 0; j < 19; ++j){
|
||||
if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0;
|
||||
@ -365,40 +406,43 @@ int generate_move(network net, int player, float *board, int multi, float thresh
|
||||
}
|
||||
|
||||
int indexes[nind];
|
||||
top_k(move, 19*19, nind, indexes);
|
||||
top_k(move, 19*19+1, nind, indexes);
|
||||
if(thresh > move[indexes[0]]) thresh = move[indexes[nind-1]];
|
||||
|
||||
for(i = 0; i < 19; ++i){
|
||||
for(j = 0; j < 19; ++j){
|
||||
if (move[i*19 + j] < thresh) move[i*19 + j] = 0;
|
||||
}
|
||||
for(i = 0; i < 19*19+1; ++i){
|
||||
if (move[i] < thresh) move[i] = 0;
|
||||
}
|
||||
|
||||
|
||||
int max = max_index(move, 19*19);
|
||||
int max = max_index(move, 19*19+1);
|
||||
int row = max / 19;
|
||||
int col = max % 19;
|
||||
int index = sample_array(move, 19*19);
|
||||
int index = sample_array(move, 19*19+1);
|
||||
|
||||
if(print){
|
||||
top_k(move, 19*19, nind, indexes);
|
||||
top_k(move, 19*19+1, nind, indexes);
|
||||
for(i = 0; i < nind; ++i){
|
||||
if (!move[indexes[i]]) indexes[i] = -1;
|
||||
}
|
||||
print_board(board, player, indexes);
|
||||
print_board(stderr, board, player, indexes);
|
||||
for(i = 0; i < nind; ++i){
|
||||
fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]);
|
||||
}
|
||||
}
|
||||
if (row == 19) return -1;
|
||||
|
||||
if(suicide_go(board, player, row, col)){
|
||||
if (suicide_go(board, player, row, col)){
|
||||
return -1;
|
||||
}
|
||||
if(suicide_go(board, player, index/19, index%19)) index = max;
|
||||
|
||||
if (suicide_go(board, player, index/19, index%19)){
|
||||
index = max;
|
||||
}
|
||||
if (index == 19*19) return -1;
|
||||
return index;
|
||||
}
|
||||
|
||||
void valid_go(char *cfgfile, char *weightfile, int multi)
|
||||
void valid_go(char *cfgfile, char *weightfile, int multi, char *filename)
|
||||
{
|
||||
srand(time(0));
|
||||
char *base = basecfg(cfgfile);
|
||||
@ -411,8 +455,9 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
|
||||
float *board = calloc(19*19, sizeof(float));
|
||||
float *move = calloc(19*19, sizeof(float));
|
||||
moves m = load_go_moves("/home/pjreddie/backup/go.test");
|
||||
float *move = calloc(19*19+1, sizeof(float));
|
||||
// moves m = load_go_moves("/home/pjreddie/backup/go.test");
|
||||
moves m = load_go_moves(filename);
|
||||
|
||||
int N = m.n;
|
||||
int i;
|
||||
@ -430,6 +475,23 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
|
||||
}
|
||||
}
|
||||
|
||||
int print_game(float *board, FILE *fp)
|
||||
{
|
||||
int i, j;
|
||||
int count = 3;
|
||||
fprintf(fp, "komi 6.5\n");
|
||||
fprintf(fp, "boardsize 19\n");
|
||||
fprintf(fp, "clear_board\n");
|
||||
for(j = 0; j < 19; ++j){
|
||||
for(i = 0; i < 19; ++i){
|
||||
if(board[j*19 + i] == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j);
|
||||
if(board[j*19 + i] == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j);
|
||||
if(board[j*19 + i]) ++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void engine_go(char *filename, char *weightfile, int multi)
|
||||
{
|
||||
network net = parse_network_cfg(filename);
|
||||
@ -456,8 +518,12 @@ void engine_go(char *filename, char *weightfile, int multi)
|
||||
printf("=%s 2\n\n", ids);
|
||||
} else if (!strcmp(buff, "name")){
|
||||
printf("=%s DarkGo\n\n", ids);
|
||||
} else if (!strcmp(buff, "time_settings") || !strcmp(buff, "time_left")){
|
||||
char *line = fgetl(stdin);
|
||||
free(line);
|
||||
printf("=%s \n\n", ids);
|
||||
} else if (!strcmp(buff, "version")){
|
||||
printf("=%s 1.0\n\n", ids);
|
||||
printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids);
|
||||
} else if (!strcmp(buff, "known_command")){
|
||||
char comm[256];
|
||||
scanf("%s", comm);
|
||||
@ -472,11 +538,14 @@ void engine_go(char *filename, char *weightfile, int multi)
|
||||
!strcmp(comm, "komi") ||
|
||||
!strcmp(comm, "final_status_list") ||
|
||||
!strcmp(comm, "play") ||
|
||||
!strcmp(comm, "genmove_white") ||
|
||||
!strcmp(comm, "genmove_black") ||
|
||||
!strcmp(comm, "fixed_handicap") ||
|
||||
!strcmp(comm, "genmove"));
|
||||
if(known) printf("=%s true\n\n", ids);
|
||||
else printf("=%s false\n\n", ids);
|
||||
} else if (!strcmp(buff, "list_commands")){
|
||||
printf("=%s protocol_version\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove\nfinal_status_list\n\n", ids);
|
||||
printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids);
|
||||
} else if (!strcmp(buff, "quit")){
|
||||
break;
|
||||
} else if (!strcmp(buff, "boardsize")){
|
||||
@ -486,8 +555,17 @@ void engine_go(char *filename, char *weightfile, int multi)
|
||||
if(boardsize != 19){
|
||||
printf("?%s unacceptable size\n\n", ids);
|
||||
} else {
|
||||
memset(board, 0, 19*19*sizeof(float));
|
||||
printf("=%s \n\n", ids);
|
||||
}
|
||||
} else if (!strcmp(buff, "fixed_handicap")){
|
||||
int handicap = 0;
|
||||
scanf("%d", &handicap);
|
||||
int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294};
|
||||
int i;
|
||||
for(i = 0; i < handicap; ++i){
|
||||
board[indexes[i]] = 1;
|
||||
}
|
||||
} else if (!strcmp(buff, "clear_board")){
|
||||
passed = 0;
|
||||
memset(board, 0, 19*19*sizeof(float));
|
||||
@ -496,14 +574,24 @@ void engine_go(char *filename, char *weightfile, int multi)
|
||||
float komi = 0;
|
||||
scanf("%f", &komi);
|
||||
printf("=%s \n\n", ids);
|
||||
} else if (!strcmp(buff, "play")){
|
||||
} else if (!strcmp(buff, "showboard")){
|
||||
printf("=%s \n", ids);
|
||||
print_board(stdout, board, 1, 0);
|
||||
printf("\n");
|
||||
} else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){
|
||||
char color[256];
|
||||
scanf("%s ", color);
|
||||
if(!strcmp(buff, "play"))
|
||||
{
|
||||
scanf("%s ", color);
|
||||
} else {
|
||||
scanf(" ");
|
||||
color[0] = buff[0];
|
||||
}
|
||||
char c;
|
||||
int r;
|
||||
int count = scanf("%c%d", &c, &r);
|
||||
int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1;
|
||||
if(c == 'p' && count < 2) {
|
||||
if((c == 'p' || c == 'P') && count < 2) {
|
||||
passed = 1;
|
||||
printf("=%s \n\n", ids);
|
||||
char *line = fgetl(stdin);
|
||||
@ -527,13 +615,20 @@ void engine_go(char *filename, char *weightfile, int multi)
|
||||
board_to_string(one, board);
|
||||
|
||||
printf("=%s \n\n", ids);
|
||||
print_board(board, 1, 0);
|
||||
} else if (!strcmp(buff, "genmove")){
|
||||
char color[256];
|
||||
scanf("%s", color);
|
||||
int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1;
|
||||
//print_board(stderr, board, 1, 0);
|
||||
} else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){
|
||||
int player = 0;
|
||||
if(!strcmp(buff, "genmove")){
|
||||
char color[256];
|
||||
scanf("%s", color);
|
||||
player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1;
|
||||
} else if (!strcmp(buff, "genmove_black")){
|
||||
player = 1;
|
||||
} else {
|
||||
player = -1;
|
||||
}
|
||||
|
||||
int index = generate_move(net, player, board, multi, .1, .7, two, 1);
|
||||
int index = generate_move(net, player, board, multi, .4, 1, two, 0);
|
||||
if(passed || index < 0){
|
||||
printf("=%s pass\n\n", ids);
|
||||
passed = 0;
|
||||
@ -550,7 +645,7 @@ void engine_go(char *filename, char *weightfile, int multi)
|
||||
row = 19 - row;
|
||||
if (col >= 8) ++col;
|
||||
printf("=%s %c%d\n\n", ids, 'A' + col, row);
|
||||
print_board(board, 1, 0);
|
||||
//print_board(board, 1, 0);
|
||||
}
|
||||
|
||||
} else if (!strcmp(buff, "p")){
|
||||
@ -562,19 +657,10 @@ void engine_go(char *filename, char *weightfile, int multi)
|
||||
char *line = fgetl(stdin);
|
||||
free(line);
|
||||
if(type[0] == 'd' || type[0] == 'D'){
|
||||
int i;
|
||||
FILE *f = fopen("game.txt", "w");
|
||||
int i, j;
|
||||
int count = 2;
|
||||
fprintf(f, "boardsize 19\n");
|
||||
fprintf(f, "clear_board\n");
|
||||
for(j = 0; j < 19; ++j){
|
||||
for(i = 0; i < 19; ++i){
|
||||
if(board[j*19 + i] == 1) fprintf(f, "play black %c%d\n", 'A'+i+(i>=8), 19-j);
|
||||
if(board[j*19 + i] == -1) fprintf(f, "play white %c%d\n", 'A'+i+(i>=8), 19-j);
|
||||
if(board[j*19 + i]) ++count;
|
||||
}
|
||||
}
|
||||
fprintf(f, "final_status_list dead\n");
|
||||
int count = print_game(board, f);
|
||||
fprintf(f, "%s final_status_list dead\n", ids);
|
||||
fclose(f);
|
||||
FILE *p = popen("./gnugo --mode gtp < game.txt", "r");
|
||||
for(i = 0; i < count; ++i){
|
||||
@ -608,44 +694,25 @@ void test_go(char *cfg, char *weights, int multi)
|
||||
srand(time(0));
|
||||
set_batch_network(&net, 1);
|
||||
float *board = calloc(19*19, sizeof(float));
|
||||
float *move = calloc(19*19, sizeof(float));
|
||||
float *move = calloc(19*19+1, sizeof(float));
|
||||
int color = 1;
|
||||
while(1){
|
||||
float *output = network_predict(net, board);
|
||||
copy_cpu(19*19, output, 1, move, 1);
|
||||
int i;
|
||||
if(multi){
|
||||
image bim = float_to_image(19, 19, 1, board);
|
||||
for(i = 1; i < 8; ++i){
|
||||
rotate_image_cw(bim, i);
|
||||
if(i >= 4) flip_image(bim);
|
||||
|
||||
float *output = network_predict(net, board);
|
||||
image oim = float_to_image(19, 19, 1, output);
|
||||
|
||||
if(i >= 4) flip_image(oim);
|
||||
rotate_image_cw(oim, -i);
|
||||
|
||||
axpy_cpu(19*19, 1, output, 1, move, 1);
|
||||
|
||||
if(i >= 4) flip_image(bim);
|
||||
rotate_image_cw(bim, -i);
|
||||
}
|
||||
scal_cpu(19*19, 1./8., move, 1);
|
||||
}
|
||||
for(i = 0; i < 19*19; ++i){
|
||||
if(board[i]) move[i] = 0;
|
||||
}
|
||||
predict_move(net, board, move, multi);
|
||||
|
||||
int indexes[nind];
|
||||
int row, col;
|
||||
top_k(move, 19*19, nind, indexes);
|
||||
print_board(board, color, indexes);
|
||||
top_k(move, 19*19+1, nind, indexes);
|
||||
print_board(stderr, board, color, indexes);
|
||||
for(i = 0; i < nind; ++i){
|
||||
int index = indexes[i];
|
||||
row = index / 19;
|
||||
col = index % 19;
|
||||
printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100);
|
||||
if(row == 19){
|
||||
printf("%d: Pass, %.2f%%\n", i+1, move[index]*100);
|
||||
} else {
|
||||
printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100);
|
||||
}
|
||||
}
|
||||
//if(color == 1) printf("\u25EF Enter move: ");
|
||||
//else printf("\u25C9 Enter move: ");
|
||||
@ -663,7 +730,9 @@ void test_go(char *cfg, char *weights, int multi)
|
||||
int index = indexes[picked];
|
||||
row = index / 19;
|
||||
col = index % 19;
|
||||
board[row*19 + col] = 1;
|
||||
if(row < 19){
|
||||
move_go(board, 1, row, col);
|
||||
}
|
||||
}
|
||||
} else if (cnum){
|
||||
if (c <= 'T' && c >= 'A'){
|
||||
@ -671,7 +740,7 @@ void test_go(char *cfg, char *weights, int multi)
|
||||
row = (inverted)?19 - row : row-1;
|
||||
col = c - 'A';
|
||||
if (col > 7 && noi) col -= 1;
|
||||
if (num == 2) board[row*19 + col] = 1;
|
||||
if (num == 2) move_go(board, 1, row, col);
|
||||
} else if (c == 'p') {
|
||||
// Pass
|
||||
} else if(c=='b' || c == 'w'){
|
||||
@ -698,19 +767,9 @@ void test_go(char *cfg, char *weights, int multi)
|
||||
|
||||
float score_game(float *board)
|
||||
{
|
||||
int i;
|
||||
FILE *f = fopen("game.txt", "w");
|
||||
int i, j;
|
||||
int count = 3;
|
||||
fprintf(f, "komi 6.5\n");
|
||||
fprintf(f, "boardsize 19\n");
|
||||
fprintf(f, "clear_board\n");
|
||||
for(j = 0; j < 19; ++j){
|
||||
for(i = 0; i < 19; ++i){
|
||||
if(board[j*19 + i] == 1) fprintf(f, "play black %c%d\n", 'A'+i+(i>=8), 19-j);
|
||||
if(board[j*19 + i] == -1) fprintf(f, "play white %c%d\n", 'A'+i+(i>=8), 19-j);
|
||||
if(board[j*19 + i]) ++count;
|
||||
}
|
||||
}
|
||||
int count = print_game(board, f);
|
||||
fprintf(f, "final_score\n");
|
||||
fclose(f);
|
||||
FILE *p = popen("./gnugo --mode gtp < game.txt", "r");
|
||||
@ -747,7 +806,7 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
|
||||
}
|
||||
}
|
||||
srand(time(0));
|
||||
char boards[300][93];
|
||||
char boards[600][93];
|
||||
int count = 0;
|
||||
set_batch_network(&net, 1);
|
||||
set_batch_network(&net2, 1);
|
||||
@ -760,13 +819,15 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
|
||||
int p2 = 0;
|
||||
int total = 0;
|
||||
while(1){
|
||||
if (done || count >= 300){
|
||||
if (done){
|
||||
float score = score_game(board);
|
||||
int i = (score > 0)? 0 : 1;
|
||||
if((score > 0) == (total%2==0)) ++p1;
|
||||
else ++p2;
|
||||
++total;
|
||||
fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total);
|
||||
sleep(1);
|
||||
/*
|
||||
int i = (score > 0)? 0 : 1;
|
||||
int j;
|
||||
for(; i < count; i += 2){
|
||||
for(j = 0; j < 93; ++j){
|
||||
@ -774,6 +835,7 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
*/
|
||||
memset(board, 0, 19*19*sizeof(float));
|
||||
player = 1;
|
||||
done = 0;
|
||||
@ -781,10 +843,10 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
}
|
||||
//print_board(board, 1, 0);
|
||||
print_board(stderr, board, 1, 0);
|
||||
//sleep(1);
|
||||
network use = ((total%2==0) == (player==1)) ? net : net2;
|
||||
int index = generate_move(use, player, board, multi, .1, .7, two, 0);
|
||||
int index = generate_move(use, player, board, multi, .4, 1, two, 0);
|
||||
if(index < 0){
|
||||
done = 1;
|
||||
continue;
|
||||
@ -818,13 +880,37 @@ void run_go(int argc, char **argv)
|
||||
return;
|
||||
}
|
||||
|
||||
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
|
||||
int *gpus = 0;
|
||||
int gpu = 0;
|
||||
int ngpus = 0;
|
||||
if(gpu_list){
|
||||
printf("%s\n", gpu_list);
|
||||
int len = strlen(gpu_list);
|
||||
ngpus = 1;
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
if (gpu_list[i] == ',') ++ngpus;
|
||||
}
|
||||
gpus = calloc(ngpus, sizeof(int));
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
gpus[i] = atoi(gpu_list);
|
||||
gpu_list = strchr(gpu_list, ',')+1;
|
||||
}
|
||||
} else {
|
||||
gpu = gpu_index;
|
||||
gpus = &gpu;
|
||||
ngpus = 1;
|
||||
}
|
||||
int clear = find_arg(argc, argv, "-clear");
|
||||
|
||||
char *cfg = argv[3];
|
||||
char *weights = (argc > 4) ? argv[4] : 0;
|
||||
char *c2 = (argc > 5) ? argv[5] : 0;
|
||||
char *w2 = (argc > 6) ? argv[6] : 0;
|
||||
int multi = find_arg(argc, argv, "-multi");
|
||||
if(0==strcmp(argv[2], "train")) train_go(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi);
|
||||
if(0==strcmp(argv[2], "train")) train_go(cfg, weights, gpus, ngpus, clear);
|
||||
else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2);
|
||||
else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi);
|
||||
else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi);
|
||||
else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, multi);
|
||||
|
108
src/image.c
108
src/image.c
@ -613,6 +613,21 @@ image float_to_image(int w, int h, int c, float *data)
|
||||
return out;
|
||||
}
|
||||
|
||||
void place_image(image im, int w, int h, int dx, int dy, image canvas)
|
||||
{
|
||||
int x, y, c;
|
||||
for(c = 0; c < im.c; ++c){
|
||||
for(y = 0; y < h; ++y){
|
||||
for(x = 0; x < w; ++x){
|
||||
int rx = ((float)x / w) * im.w;
|
||||
int ry = ((float)y / h) * im.h;
|
||||
float val = bilinear_interpolate(im, rx, ry, c);
|
||||
set_pixel(canvas, x + dx, y + dy, c, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect)
|
||||
{
|
||||
@ -652,6 +667,12 @@ image rotate_image(image im, float rad)
|
||||
return rot;
|
||||
}
|
||||
|
||||
void fill_image(image m, float s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s;
|
||||
}
|
||||
|
||||
void translate_image(image m, float s)
|
||||
{
|
||||
int i;
|
||||
@ -753,6 +774,27 @@ void composite_3d(char *f1, char *f2, char *out, int delta)
|
||||
#endif
|
||||
}
|
||||
|
||||
image letterbox_image(image im, int w, int h)
|
||||
{
|
||||
int new_w = im.w;
|
||||
int new_h = im.h;
|
||||
if (((float)w/im.w) < ((float)h/im.h)) {
|
||||
new_w = w;
|
||||
new_h = (im.h * w)/im.w;
|
||||
} else {
|
||||
new_h = h;
|
||||
new_w = (im.w * h)/im.h;
|
||||
}
|
||||
image resized = resize_image(im, new_w, new_h);
|
||||
image boxed = make_image(w, h, im.c);
|
||||
fill_image(boxed, .5);
|
||||
//int i;
|
||||
//for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0;
|
||||
embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2);
|
||||
free_image(resized);
|
||||
return boxed;
|
||||
}
|
||||
|
||||
image resize_max(image im, int max)
|
||||
{
|
||||
int w = im.w;
|
||||
@ -824,6 +866,52 @@ float three_way_min(float a, float b, float c)
|
||||
return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ;
|
||||
}
|
||||
|
||||
void yuv_to_rgb(image im)
|
||||
{
|
||||
assert(im.c == 3);
|
||||
int i, j;
|
||||
float r, g, b;
|
||||
float y, u, v;
|
||||
for(j = 0; j < im.h; ++j){
|
||||
for(i = 0; i < im.w; ++i){
|
||||
y = get_pixel(im, i , j, 0);
|
||||
u = get_pixel(im, i , j, 1);
|
||||
v = get_pixel(im, i , j, 2);
|
||||
|
||||
r = y + 1.13983*v;
|
||||
g = y + -.39465*u + -.58060*v;
|
||||
b = y + 2.03211*u;
|
||||
|
||||
set_pixel(im, i, j, 0, r);
|
||||
set_pixel(im, i, j, 1, g);
|
||||
set_pixel(im, i, j, 2, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rgb_to_yuv(image im)
|
||||
{
|
||||
assert(im.c == 3);
|
||||
int i, j;
|
||||
float r, g, b;
|
||||
float y, u, v;
|
||||
for(j = 0; j < im.h; ++j){
|
||||
for(i = 0; i < im.w; ++i){
|
||||
r = get_pixel(im, i , j, 0);
|
||||
g = get_pixel(im, i , j, 1);
|
||||
b = get_pixel(im, i , j, 2);
|
||||
|
||||
y = .299*r + .587*g + .114*b;
|
||||
u = -.14713*r + -.28886*g + .436*b;
|
||||
v = .615*r + -.51499*g + -.10001*b;
|
||||
|
||||
set_pixel(im, i, j, 0, y);
|
||||
set_pixel(im, i, j, 1, u);
|
||||
set_pixel(im, i, j, 2, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// http://www.cs.rit.edu/~ncs/color/t_convert.html
|
||||
void rgb_to_hsv(image im)
|
||||
{
|
||||
@ -903,12 +991,30 @@ void hsv_to_rgb(image im)
|
||||
}
|
||||
}
|
||||
|
||||
void grayscale_image_3c(image im)
|
||||
{
|
||||
assert(im.c == 3);
|
||||
int i, j, k;
|
||||
float scale[] = {0.299, 0.587, 0.114};
|
||||
for(j = 0; j < im.h; ++j){
|
||||
for(i = 0; i < im.w; ++i){
|
||||
float val = 0;
|
||||
for(k = 0; k < 3; ++k){
|
||||
val += scale[k]*get_pixel(im, i, j, k);
|
||||
}
|
||||
im.data[0*im.h*im.w + im.w*j + i] = val;
|
||||
im.data[1*im.h*im.w + im.w*j + i] = val;
|
||||
im.data[2*im.h*im.w + im.w*j + i] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image grayscale_image(image im)
|
||||
{
|
||||
assert(im.c == 3);
|
||||
int i, j, k;
|
||||
image gray = make_image(im.w, im.h, 1);
|
||||
float scale[] = {0.587, 0.299, 0.114};
|
||||
float scale[] = {0.299, 0.587, 0.114};
|
||||
for(k = 0; k < im.c; ++k){
|
||||
for(j = 0; j < im.h; ++j){
|
||||
for(i = 0; i < im.w; ++i){
|
||||
|
@ -29,25 +29,32 @@ image crop_image(image im, int dx, int dy, int w, int h);
|
||||
image random_crop_image(image im, int w, int h);
|
||||
image random_augment_image(image im, float angle, float aspect, int low, int high, int size);
|
||||
void random_distort_image(image im, float hue, float saturation, float exposure);
|
||||
image letterbox_image(image im, int w, int h);
|
||||
image resize_image(image im, int w, int h);
|
||||
image resize_min(image im, int min);
|
||||
image resize_max(image im, int max);
|
||||
void fill_image(image m, float s);
|
||||
void translate_image(image m, float s);
|
||||
void normalize_image(image p);
|
||||
image rotate_image(image m, float rad);
|
||||
void rotate_image_cw(image im, int times);
|
||||
void embed_image(image source, image dest, int dx, int dy);
|
||||
void place_image(image im, int w, int h, int dx, int dy, image canvas);
|
||||
void saturate_image(image im, float sat);
|
||||
void exposure_image(image im, float sat);
|
||||
void distort_image(image im, float hue, float sat, float val);
|
||||
void saturate_exposure_image(image im, float sat, float exposure);
|
||||
void rgb_to_hsv(image im);
|
||||
void hsv_to_rgb(image im);
|
||||
void yuv_to_rgb(image im);
|
||||
void rgb_to_yuv(image im);
|
||||
void rgbgr_image(image im);
|
||||
void constrain_image(image im);
|
||||
void composite_3d(char *f1, char *f2, char *out, int delta);
|
||||
int best_3d_shift_r(image a, image b, int min, int max);
|
||||
|
||||
image grayscale_image(image im);
|
||||
void grayscale_image_3c(image im);
|
||||
image threshold_image(image im, float thresh);
|
||||
|
||||
image collapse_image_layers(image source, int border);
|
||||
|
@ -32,7 +32,6 @@ void free_layer(layer l)
|
||||
if(l.scale_updates) free(l.scale_updates);
|
||||
if(l.weights) free(l.weights);
|
||||
if(l.weight_updates) free(l.weight_updates);
|
||||
if(l.col_image) free(l.col_image);
|
||||
if(l.delta) free(l.delta);
|
||||
if(l.output) free(l.output);
|
||||
if(l.squared) free(l.squared);
|
||||
@ -80,7 +79,6 @@ void free_layer(layer l)
|
||||
if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu);
|
||||
if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu);
|
||||
if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu);
|
||||
if(l.col_image_gpu) cuda_free(l.col_image_gpu);
|
||||
if(l.x_gpu) cuda_free(l.x_gpu);
|
||||
if(l.x_norm_gpu) cuda_free(l.x_norm_gpu);
|
||||
if(l.weights_gpu) cuda_free(l.weights_gpu);
|
||||
|
13
src/layer.h
13
src/layer.h
@ -38,7 +38,7 @@ typedef enum {
|
||||
} LAYER_TYPE;
|
||||
|
||||
typedef enum{
|
||||
SSE, MASKED, SMOOTH
|
||||
SSE, MASKED, L1, SMOOTH
|
||||
} COST_TYPE;
|
||||
|
||||
struct layer{
|
||||
@ -58,6 +58,7 @@ struct layer{
|
||||
int flipped;
|
||||
int inputs;
|
||||
int outputs;
|
||||
int extra;
|
||||
int truths;
|
||||
int h,w,c;
|
||||
int out_h, out_w, out_c;
|
||||
@ -68,6 +69,7 @@ struct layer{
|
||||
int side;
|
||||
int stride;
|
||||
int reverse;
|
||||
int flatten;
|
||||
int pad;
|
||||
int sqrt;
|
||||
int flip;
|
||||
@ -76,6 +78,8 @@ struct layer{
|
||||
int xnor;
|
||||
int steps;
|
||||
int hidden;
|
||||
int truth;
|
||||
float smooth;
|
||||
float dot;
|
||||
float angle;
|
||||
float jitter;
|
||||
@ -83,6 +87,7 @@ struct layer{
|
||||
float exposure;
|
||||
float shift;
|
||||
float ratio;
|
||||
float learning_rate_scale;
|
||||
int softmax;
|
||||
int classes;
|
||||
int coords;
|
||||
@ -115,6 +120,8 @@ struct layer{
|
||||
int classfix;
|
||||
int absolute;
|
||||
|
||||
int onlyforward;
|
||||
int stopbackward;
|
||||
int dontload;
|
||||
int dontloadscales;
|
||||
|
||||
@ -149,7 +156,6 @@ struct layer{
|
||||
float * weights;
|
||||
float * weight_updates;
|
||||
|
||||
float * col_image;
|
||||
float * delta;
|
||||
float * output;
|
||||
float * squared;
|
||||
@ -235,8 +241,6 @@ struct layer{
|
||||
float * variance_delta_gpu;
|
||||
float * mean_delta_gpu;
|
||||
|
||||
float * col_image_gpu;
|
||||
|
||||
float * x_gpu;
|
||||
float * x_norm_gpu;
|
||||
float * weights_gpu;
|
||||
@ -256,6 +260,7 @@ struct layer{
|
||||
#ifdef CUDNN
|
||||
cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc;
|
||||
cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
|
||||
cudnnTensorDescriptor_t normTensorDesc;
|
||||
cudnnFilterDescriptor_t weightDesc;
|
||||
cudnnFilterDescriptor_t dweightDesc;
|
||||
cudnnConvolutionDescriptor_t convDesc;
|
||||
|
@ -57,9 +57,10 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
|
||||
float scale = sqrt(2./(size*size*c));
|
||||
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
|
||||
|
||||
l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
|
||||
l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
|
||||
|
||||
l.workspace_size = out_h*out_w*size*size*c;
|
||||
|
||||
l.forward = forward_local_layer;
|
||||
l.backward = backward_local_layer;
|
||||
@ -76,7 +77,6 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
|
||||
l.biases_gpu = cuda_make_array(l.biases, l.outputs);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs);
|
||||
|
||||
l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c);
|
||||
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
|
||||
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
|
||||
@ -102,11 +102,11 @@ void forward_local_layer(const local_layer l, network_state state)
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
float *input = state.input + i*l.w*l.h*l.c;
|
||||
im2col_cpu(input, l.c, l.h, l.w,
|
||||
l.size, l.stride, l.pad, l.col_image);
|
||||
l.size, l.stride, l.pad, state.workspace);
|
||||
float *output = l.output + i*l.outputs;
|
||||
for(j = 0; j < locations; ++j){
|
||||
float *a = l.weights + j*l.size*l.size*l.c*l.n;
|
||||
float *b = l.col_image + j;
|
||||
float *b = state.workspace + j;
|
||||
float *c = output + j;
|
||||
|
||||
int m = l.n;
|
||||
@ -133,11 +133,11 @@ void backward_local_layer(local_layer l, network_state state)
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
float *input = state.input + i*l.w*l.h*l.c;
|
||||
im2col_cpu(input, l.c, l.h, l.w,
|
||||
l.size, l.stride, l.pad, l.col_image);
|
||||
l.size, l.stride, l.pad, state.workspace);
|
||||
|
||||
for(j = 0; j < locations; ++j){
|
||||
float *a = l.delta + i*l.outputs + j;
|
||||
float *b = l.col_image + j;
|
||||
float *b = state.workspace + j;
|
||||
float *c = l.weight_updates + j*l.size*l.size*l.c*l.n;
|
||||
int m = l.n;
|
||||
int n = l.size*l.size*l.c;
|
||||
@ -150,7 +150,7 @@ void backward_local_layer(local_layer l, network_state state)
|
||||
for(j = 0; j < locations; ++j){
|
||||
float *a = l.weights + j*l.size*l.size*l.c*l.n;
|
||||
float *b = l.delta + i*l.outputs + j;
|
||||
float *c = l.col_image + j;
|
||||
float *c = state.workspace + j;
|
||||
|
||||
int m = l.size*l.size*l.c;
|
||||
int n = 1;
|
||||
@ -159,7 +159,7 @@ void backward_local_layer(local_layer l, network_state state)
|
||||
gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
|
||||
}
|
||||
|
||||
col2im_cpu(l.col_image, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
|
||||
col2im_cpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -192,11 +192,11 @@ void forward_local_layer_gpu(const local_layer l, network_state state)
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
float *input = state.input + i*l.w*l.h*l.c;
|
||||
im2col_ongpu(input, l.c, l.h, l.w,
|
||||
l.size, l.stride, l.pad, l.col_image_gpu);
|
||||
l.size, l.stride, l.pad, state.workspace);
|
||||
float *output = l.output_gpu + i*l.outputs;
|
||||
for(j = 0; j < locations; ++j){
|
||||
float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
|
||||
float *b = l.col_image_gpu + j;
|
||||
float *b = state.workspace + j;
|
||||
float *c = output + j;
|
||||
|
||||
int m = l.n;
|
||||
@ -222,11 +222,11 @@ void backward_local_layer_gpu(local_layer l, network_state state)
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
float *input = state.input + i*l.w*l.h*l.c;
|
||||
im2col_ongpu(input, l.c, l.h, l.w,
|
||||
l.size, l.stride, l.pad, l.col_image_gpu);
|
||||
l.size, l.stride, l.pad, state.workspace);
|
||||
|
||||
for(j = 0; j < locations; ++j){
|
||||
float *a = l.delta_gpu + i*l.outputs + j;
|
||||
float *b = l.col_image_gpu + j;
|
||||
float *b = state.workspace + j;
|
||||
float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n;
|
||||
int m = l.n;
|
||||
int n = l.size*l.size*l.c;
|
||||
@ -239,7 +239,7 @@ void backward_local_layer_gpu(local_layer l, network_state state)
|
||||
for(j = 0; j < locations; ++j){
|
||||
float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
|
||||
float *b = l.delta_gpu + i*l.outputs + j;
|
||||
float *c = l.col_image_gpu + j;
|
||||
float *c = state.workspace + j;
|
||||
|
||||
int m = l.size*l.size*l.c;
|
||||
int n = 1;
|
||||
@ -248,7 +248,7 @@ void backward_local_layer_gpu(local_layer l, network_state state)
|
||||
gemm_ongpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
|
||||
}
|
||||
|
||||
col2im_ongpu(l.col_image_gpu, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
|
||||
col2im_ongpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
924
src/lsd.c
Normal file
924
src/lsd.c
Normal file
@ -0,0 +1,924 @@
|
||||
#include "network.h"
|
||||
#include "cost_layer.h"
|
||||
#include "utils.h"
|
||||
#include "parser.h"
|
||||
#include "blas.h"
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#endif
|
||||
|
||||
void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear)
|
||||
{
|
||||
#ifdef GPU
|
||||
//char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
|
||||
char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list";
|
||||
//char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
|
||||
char *style_images = "/home/pjreddie/zelda.txt";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
srand(time(0));
|
||||
network fnet = load_network(fcfg, fweight, clear);
|
||||
network gnet = load_network(gcfg, gweight, clear);
|
||||
network anet = load_network(acfg, aweight, clear);
|
||||
char *gbase = basecfg(gcfg);
|
||||
char *abase = basecfg(acfg);
|
||||
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet.learning_rate, gnet.momentum, gnet.decay);
|
||||
int imgs = gnet.batch*gnet.subdivisions;
|
||||
int i = *gnet.seen/imgs;
|
||||
data train, tbuffer;
|
||||
data style, sbuffer;
|
||||
|
||||
|
||||
list *slist = get_paths(style_images);
|
||||
char **spaths = (char **)list_to_array(slist);
|
||||
|
||||
list *tlist = get_paths(train_images);
|
||||
char **tpaths = (char **)list_to_array(tlist);
|
||||
|
||||
load_args targs= get_base_args(gnet);
|
||||
targs.paths = tpaths;
|
||||
targs.n = imgs;
|
||||
targs.m = tlist->size;
|
||||
targs.d = &tbuffer;
|
||||
targs.type = CLASSIFICATION_DATA;
|
||||
targs.classes = 1;
|
||||
char *ls[1] = {"zelda"};
|
||||
targs.labels = ls;
|
||||
|
||||
load_args sargs = get_base_args(gnet);
|
||||
sargs.paths = spaths;
|
||||
sargs.n = imgs;
|
||||
sargs.m = slist->size;
|
||||
sargs.d = &sbuffer;
|
||||
sargs.type = CLASSIFICATION_DATA;
|
||||
sargs.classes = 1;
|
||||
sargs.labels = ls;
|
||||
|
||||
pthread_t tload_thread = load_data_in_thread(targs);
|
||||
pthread_t sload_thread = load_data_in_thread(sargs);
|
||||
clock_t time;
|
||||
|
||||
float aloss_avg = -1;
|
||||
float floss_avg = -1;
|
||||
|
||||
network_state fstate = {0};
|
||||
fstate.index = 0;
|
||||
fstate.net = fnet;
|
||||
int x_size = get_network_input_size(fnet)*fnet.batch;
|
||||
int y_size = get_network_output_size(fnet)*fnet.batch;
|
||||
fstate.input = cuda_make_array(0, x_size);
|
||||
fstate.truth = cuda_make_array(0, y_size);
|
||||
fstate.delta = cuda_make_array(0, x_size);
|
||||
fstate.train = 1;
|
||||
float *X = calloc(x_size, sizeof(float));
|
||||
float *y = calloc(y_size, sizeof(float));
|
||||
|
||||
float *ones = cuda_make_array(0, anet.batch);
|
||||
float *zeros = cuda_make_array(0, anet.batch);
|
||||
fill_ongpu(anet.batch, .99, ones, 1);
|
||||
fill_ongpu(anet.batch, .01, zeros, 1);
|
||||
|
||||
network_state astate = {0};
|
||||
astate.index = 0;
|
||||
astate.net = anet;
|
||||
int ax_size = get_network_input_size(anet)*anet.batch;
|
||||
int ay_size = get_network_output_size(anet)*anet.batch;
|
||||
astate.input = 0;
|
||||
astate.truth = ones;
|
||||
astate.delta = cuda_make_array(0, ax_size);
|
||||
astate.train = 1;
|
||||
|
||||
network_state gstate = {0};
|
||||
gstate.index = 0;
|
||||
gstate.net = gnet;
|
||||
int gx_size = get_network_input_size(gnet)*gnet.batch;
|
||||
int gy_size = get_network_output_size(gnet)*gnet.batch;
|
||||
gstate.input = cuda_make_array(0, gx_size);
|
||||
gstate.truth = 0;
|
||||
gstate.delta = 0;
|
||||
gstate.train = 1;
|
||||
|
||||
while (get_current_batch(gnet) < gnet.max_batches) {
|
||||
i += 1;
|
||||
time=clock();
|
||||
pthread_join(tload_thread, 0);
|
||||
pthread_join(sload_thread, 0);
|
||||
train = tbuffer;
|
||||
style = sbuffer;
|
||||
tload_thread = load_data_in_thread(targs);
|
||||
sload_thread = load_data_in_thread(sargs);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
data generated = copy_data(train);
|
||||
time=clock();
|
||||
|
||||
int j, k;
|
||||
float floss = 0;
|
||||
for(j = 0; j < fnet.subdivisions; ++j){
|
||||
layer imlayer = gnet.layers[gnet.n - 1];
|
||||
get_next_batch(train, fnet.batch, j*fnet.batch, X, y);
|
||||
|
||||
cuda_push_array(fstate.input, X, x_size);
|
||||
cuda_push_array(gstate.input, X, gx_size);
|
||||
*gnet.seen += gnet.batch;
|
||||
|
||||
forward_network_gpu(fnet, fstate);
|
||||
float *feats = fnet.layers[fnet.n - 2].output_gpu;
|
||||
copy_ongpu(y_size, feats, 1, fstate.truth, 1);
|
||||
|
||||
forward_network_gpu(gnet, gstate);
|
||||
float *gen = gnet.layers[gnet.n-1].output_gpu;
|
||||
copy_ongpu(x_size, gen, 1, fstate.input, 1);
|
||||
|
||||
fill_ongpu(x_size, 0, fstate.delta, 1);
|
||||
forward_network_gpu(fnet, fstate);
|
||||
backward_network_gpu(fnet, fstate);
|
||||
//HERE
|
||||
|
||||
astate.input = gen;
|
||||
fill_ongpu(ax_size, 0, astate.delta, 1);
|
||||
forward_network_gpu(anet, astate);
|
||||
backward_network_gpu(anet, astate);
|
||||
|
||||
float *delta = imlayer.delta_gpu;
|
||||
fill_ongpu(x_size, 0, delta, 1);
|
||||
scal_ongpu(x_size, 100, astate.delta, 1);
|
||||
scal_ongpu(x_size, .00001, fstate.delta, 1);
|
||||
axpy_ongpu(x_size, 1, fstate.delta, 1, delta, 1);
|
||||
axpy_ongpu(x_size, 1, astate.delta, 1, delta, 1);
|
||||
|
||||
//fill_ongpu(x_size, 0, delta, 1);
|
||||
//cuda_push_array(delta, X, x_size);
|
||||
//axpy_ongpu(x_size, -1, imlayer.output_gpu, 1, delta, 1);
|
||||
//printf("pix error: %f\n", cuda_mag_array(delta, x_size));
|
||||
printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size));
|
||||
printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size));
|
||||
//axpy_ongpu(x_size, 1, astate.delta, 1, delta, 1);
|
||||
|
||||
backward_network_gpu(gnet, gstate);
|
||||
|
||||
floss += get_network_cost(fnet) /(fnet.subdivisions*fnet.batch);
|
||||
|
||||
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
|
||||
for(k = 0; k < gnet.batch; ++k){
|
||||
int index = j*gnet.batch + k;
|
||||
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1);
|
||||
generated.y.vals[index][0] = .01;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
image sim = float_to_image(anet.w, anet.h, anet.c, style.X.vals[j]);
|
||||
show_image(sim, "style");
|
||||
cvWaitKey(0);
|
||||
*/
|
||||
|
||||
harmless_update_network_gpu(anet);
|
||||
|
||||
data merge = concat_data(style, generated);
|
||||
randomize_data(merge);
|
||||
float aloss = train_network(anet, merge);
|
||||
|
||||
update_network_gpu(gnet);
|
||||
|
||||
free_data(merge);
|
||||
free_data(train);
|
||||
free_data(generated);
|
||||
free_data(style);
|
||||
if (aloss_avg < 0) aloss_avg = aloss;
|
||||
if (floss_avg < 0) floss_avg = floss;
|
||||
aloss_avg = aloss_avg*.9 + aloss*.1;
|
||||
floss_avg = floss_avg*.9 + floss*.1;
|
||||
|
||||
printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs);
|
||||
if(i%1000==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i);
|
||||
save_weights(gnet, buff);
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
|
||||
save_weights(anet, buff);
|
||||
}
|
||||
if(i%100==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, gbase);
|
||||
save_weights(gnet, buff);
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, abase);
|
||||
save_weights(anet, buff);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear)
|
||||
{
|
||||
#ifdef GPU
|
||||
//char *train_images = "/home/pjreddie/data/coco/train1.txt";
|
||||
//char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
|
||||
char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
srand(time(0));
|
||||
char *base = basecfg(cfg);
|
||||
char *abase = basecfg(acfg);
|
||||
printf("%s\n", base);
|
||||
network net = load_network(cfg, weight, clear);
|
||||
network anet = load_network(acfg, aweight, clear);
|
||||
|
||||
int i, j, k;
|
||||
layer imlayer = {0};
|
||||
for (i = 0; i < net.n; ++i) {
|
||||
if (net.layers[i].out_c == 3) {
|
||||
imlayer = net.layers[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = net.batch*net.subdivisions;
|
||||
i = *net.seen/imgs;
|
||||
data train, buffer;
|
||||
|
||||
|
||||
list *plist = get_paths(train_images);
|
||||
//int N = plist->size;
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.paths = paths;
|
||||
args.n = imgs;
|
||||
args.m = plist->size;
|
||||
args.d = &buffer;
|
||||
|
||||
args.min = net.min_crop;
|
||||
args.max = net.max_crop;
|
||||
args.angle = net.angle;
|
||||
args.aspect = net.aspect;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.size = net.w;
|
||||
args.type = CLASSIFICATION_DATA;
|
||||
args.classes = 1;
|
||||
char *ls[1] = {"coco"};
|
||||
args.labels = ls;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
|
||||
network_state gstate = {0};
|
||||
gstate.index = 0;
|
||||
gstate.net = net;
|
||||
int x_size = get_network_input_size(net)*net.batch;
|
||||
int y_size = x_size;
|
||||
gstate.input = cuda_make_array(0, x_size);
|
||||
gstate.truth = cuda_make_array(0, y_size);
|
||||
gstate.delta = 0;
|
||||
gstate.train = 1;
|
||||
float *pixs = calloc(x_size, sizeof(float));
|
||||
float *graypixs = calloc(x_size, sizeof(float));
|
||||
float *y = calloc(y_size, sizeof(float));
|
||||
|
||||
network_state astate = {0};
|
||||
astate.index = 0;
|
||||
astate.net = anet;
|
||||
int ay_size = get_network_output_size(anet)*anet.batch;
|
||||
astate.input = 0;
|
||||
astate.truth = 0;
|
||||
astate.delta = 0;
|
||||
astate.train = 1;
|
||||
|
||||
float *imerror = cuda_make_array(0, imlayer.outputs);
|
||||
float *ones_gpu = cuda_make_array(0, ay_size);
|
||||
fill_ongpu(ay_size, .9, ones_gpu, 1);
|
||||
|
||||
float aloss_avg = -1;
|
||||
float gloss_avg = -1;
|
||||
|
||||
//data generated = copy_data(train);
|
||||
|
||||
while (get_current_batch(net) < net.max_batches) {
|
||||
i += 1;
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data_in_thread(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
data gray = copy_data(train);
|
||||
for(j = 0; j < imgs; ++j){
|
||||
image gim = float_to_image(net.w, net.h, net.c, gray.X.vals[j]);
|
||||
grayscale_image_3c(gim);
|
||||
train.y.vals[j][0] = .9;
|
||||
|
||||
image yim = float_to_image(net.w, net.h, net.c, train.X.vals[j]);
|
||||
//rgb_to_yuv(yim);
|
||||
}
|
||||
time=clock();
|
||||
float gloss = 0;
|
||||
|
||||
for(j = 0; j < net.subdivisions; ++j){
|
||||
get_next_batch(train, net.batch, j*net.batch, pixs, y);
|
||||
get_next_batch(gray, net.batch, j*net.batch, graypixs, y);
|
||||
cuda_push_array(gstate.input, graypixs, x_size);
|
||||
cuda_push_array(gstate.truth, pixs, x_size);
|
||||
/*
|
||||
image origi = float_to_image(net.w, net.h, 3, pixs);
|
||||
image grayi = float_to_image(net.w, net.h, 3, graypixs);
|
||||
show_image(grayi, "gray");
|
||||
show_image(origi, "orig");
|
||||
cvWaitKey(0);
|
||||
*/
|
||||
*net.seen += net.batch;
|
||||
forward_network_gpu(net, gstate);
|
||||
|
||||
fill_ongpu(imlayer.outputs, 0, imerror, 1);
|
||||
astate.input = imlayer.output_gpu;
|
||||
astate.delta = imerror;
|
||||
astate.truth = ones_gpu;
|
||||
forward_network_gpu(anet, astate);
|
||||
backward_network_gpu(anet, astate);
|
||||
|
||||
scal_ongpu(imlayer.outputs, .1, net.layers[net.n-1].delta_gpu, 1);
|
||||
|
||||
backward_network_gpu(net, gstate);
|
||||
|
||||
scal_ongpu(imlayer.outputs, 100, imerror, 1);
|
||||
|
||||
printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs));
|
||||
printf("features %f\n", cuda_mag_array(net.layers[net.n-1].delta_gpu, imlayer.outputs));
|
||||
|
||||
axpy_ongpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1);
|
||||
|
||||
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
|
||||
|
||||
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
|
||||
for(k = 0; k < net.batch; ++k){
|
||||
int index = j*net.batch + k;
|
||||
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1);
|
||||
gray.y.vals[index][0] = .1;
|
||||
}
|
||||
}
|
||||
harmless_update_network_gpu(anet);
|
||||
|
||||
data merge = concat_data(train, gray);
|
||||
randomize_data(merge);
|
||||
float aloss = train_network(anet, merge);
|
||||
|
||||
update_network_gpu(net);
|
||||
update_network_gpu(anet);
|
||||
free_data(merge);
|
||||
free_data(train);
|
||||
free_data(gray);
|
||||
if (aloss_avg < 0) aloss_avg = aloss;
|
||||
aloss_avg = aloss_avg*.9 + aloss*.1;
|
||||
gloss_avg = gloss_avg*.9 + gloss*.1;
|
||||
|
||||
printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs);
|
||||
if(i%1000==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
||||
save_weights(net, buff);
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
|
||||
save_weights(anet, buff);
|
||||
}
|
||||
if(i%100==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, abase);
|
||||
save_weights(anet, buff);
|
||||
}
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
#endif
|
||||
}
|
||||
|
||||
void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear)
|
||||
{
|
||||
#ifdef GPU
|
||||
//char *train_images = "/home/pjreddie/data/coco/train1.txt";
|
||||
//char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
|
||||
char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
srand(time(0));
|
||||
char *base = basecfg(cfg);
|
||||
char *abase = basecfg(acfg);
|
||||
printf("%s\n", base);
|
||||
network net = load_network(cfg, weight, clear);
|
||||
network anet = load_network(acfg, aweight, clear);
|
||||
|
||||
int i, j, k;
|
||||
layer imlayer = {0};
|
||||
for (i = 0; i < net.n; ++i) {
|
||||
if (net.layers[i].out_c == 3) {
|
||||
imlayer = net.layers[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = net.batch*net.subdivisions;
|
||||
i = *net.seen/imgs;
|
||||
data train, buffer;
|
||||
|
||||
|
||||
list *plist = get_paths(train_images);
|
||||
//int N = plist->size;
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.paths = paths;
|
||||
args.n = imgs;
|
||||
args.m = plist->size;
|
||||
args.d = &buffer;
|
||||
|
||||
args.min = net.min_crop;
|
||||
args.max = net.max_crop;
|
||||
args.angle = net.angle;
|
||||
args.aspect = net.aspect;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.size = net.w;
|
||||
args.type = CLASSIFICATION_DATA;
|
||||
args.classes = 1;
|
||||
char *ls[1] = {"imagenet"};
|
||||
args.labels = ls;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
|
||||
network_state gstate = {0};
|
||||
gstate.index = 0;
|
||||
gstate.net = net;
|
||||
int x_size = get_network_input_size(net)*net.batch;
|
||||
int y_size = x_size;
|
||||
gstate.input = cuda_make_array(0, x_size);
|
||||
gstate.truth = cuda_make_array(0, y_size);
|
||||
gstate.delta = 0;
|
||||
gstate.train = 1;
|
||||
float *pixs = calloc(x_size, sizeof(float));
|
||||
float *graypixs = calloc(x_size, sizeof(float));
|
||||
float *y = calloc(y_size, sizeof(float));
|
||||
|
||||
network_state astate = {0};
|
||||
astate.index = 0;
|
||||
astate.net = anet;
|
||||
int ay_size = get_network_output_size(anet)*anet.batch;
|
||||
astate.input = 0;
|
||||
astate.truth = 0;
|
||||
astate.delta = 0;
|
||||
astate.train = 1;
|
||||
|
||||
float *imerror = cuda_make_array(0, imlayer.outputs);
|
||||
float *ones_gpu = cuda_make_array(0, ay_size);
|
||||
fill_ongpu(ay_size, .99, ones_gpu, 1);
|
||||
|
||||
float aloss_avg = -1;
|
||||
float gloss_avg = -1;
|
||||
|
||||
//data generated = copy_data(train);
|
||||
|
||||
while (get_current_batch(net) < net.max_batches) {
|
||||
i += 1;
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data_in_thread(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
data gray = copy_data(train);
|
||||
for(j = 0; j < imgs; ++j){
|
||||
image gim = float_to_image(net.w, net.h, net.c, gray.X.vals[j]);
|
||||
grayscale_image_3c(gim);
|
||||
train.y.vals[j][0] = .99;
|
||||
|
||||
image yim = float_to_image(net.w, net.h, net.c, train.X.vals[j]);
|
||||
//rgb_to_yuv(yim);
|
||||
}
|
||||
time=clock();
|
||||
float gloss = 0;
|
||||
|
||||
for(j = 0; j < net.subdivisions; ++j){
|
||||
get_next_batch(train, net.batch, j*net.batch, pixs, y);
|
||||
get_next_batch(gray, net.batch, j*net.batch, graypixs, y);
|
||||
cuda_push_array(gstate.input, graypixs, x_size);
|
||||
cuda_push_array(gstate.truth, pixs, x_size);
|
||||
/*
|
||||
image origi = float_to_image(net.w, net.h, 3, pixs);
|
||||
image grayi = float_to_image(net.w, net.h, 3, graypixs);
|
||||
show_image(grayi, "gray");
|
||||
show_image(origi, "orig");
|
||||
cvWaitKey(0);
|
||||
*/
|
||||
*net.seen += net.batch;
|
||||
forward_network_gpu(net, gstate);
|
||||
|
||||
fill_ongpu(imlayer.outputs, 0, imerror, 1);
|
||||
astate.input = imlayer.output_gpu;
|
||||
astate.delta = imerror;
|
||||
astate.truth = ones_gpu;
|
||||
forward_network_gpu(anet, astate);
|
||||
backward_network_gpu(anet, astate);
|
||||
|
||||
scal_ongpu(imlayer.outputs, .1, net.layers[net.n-1].delta_gpu, 1);
|
||||
|
||||
backward_network_gpu(net, gstate);
|
||||
|
||||
scal_ongpu(imlayer.outputs, 100, imerror, 1);
|
||||
|
||||
printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs));
|
||||
printf("features %f\n", cuda_mag_array(net.layers[net.n-1].delta_gpu, imlayer.outputs));
|
||||
|
||||
axpy_ongpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1);
|
||||
|
||||
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
|
||||
|
||||
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
|
||||
for(k = 0; k < net.batch; ++k){
|
||||
int index = j*net.batch + k;
|
||||
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1);
|
||||
gray.y.vals[index][0] = .01;
|
||||
}
|
||||
}
|
||||
harmless_update_network_gpu(anet);
|
||||
|
||||
data merge = concat_data(train, gray);
|
||||
randomize_data(merge);
|
||||
float aloss = train_network(anet, merge);
|
||||
|
||||
update_network_gpu(net);
|
||||
update_network_gpu(anet);
|
||||
free_data(merge);
|
||||
free_data(train);
|
||||
free_data(gray);
|
||||
if (aloss_avg < 0) aloss_avg = aloss;
|
||||
aloss_avg = aloss_avg*.9 + aloss*.1;
|
||||
gloss_avg = gloss_avg*.9 + gloss*.1;
|
||||
|
||||
printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs);
|
||||
if(i%1000==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
||||
save_weights(net, buff);
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
|
||||
save_weights(anet, buff);
|
||||
}
|
||||
if(i%100==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, abase);
|
||||
save_weights(anet, buff);
|
||||
}
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
#endif
|
||||
}
|
||||
|
||||
void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear)
|
||||
{
|
||||
#ifdef GPU
|
||||
char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
srand(time(0));
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
if(clear) *net.seen = 0;
|
||||
|
||||
char *abase = basecfg(acfgfile);
|
||||
network anet = parse_network_cfg(acfgfile);
|
||||
if(aweightfile){
|
||||
load_weights(&anet, aweightfile);
|
||||
}
|
||||
if(clear) *anet.seen = 0;
|
||||
|
||||
int i, j, k;
|
||||
layer imlayer = {0};
|
||||
for (i = 0; i < net.n; ++i) {
|
||||
if (net.layers[i].out_c == 3) {
|
||||
imlayer = net.layers[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = net.batch*net.subdivisions;
|
||||
i = *net.seen/imgs;
|
||||
data train, buffer;
|
||||
|
||||
|
||||
list *plist = get_paths(train_images);
|
||||
//int N = plist->size;
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.paths = paths;
|
||||
args.n = imgs;
|
||||
args.m = plist->size;
|
||||
args.d = &buffer;
|
||||
|
||||
args.min = net.min_crop;
|
||||
args.max = net.max_crop;
|
||||
args.angle = net.angle;
|
||||
args.aspect = net.aspect;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.size = net.w;
|
||||
args.type = CLASSIFICATION_DATA;
|
||||
args.classes = 1;
|
||||
char *ls[1] = {"coco"};
|
||||
args.labels = ls;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
|
||||
network_state gstate = {0};
|
||||
gstate.index = 0;
|
||||
gstate.net = net;
|
||||
int x_size = get_network_input_size(net)*net.batch;
|
||||
int y_size = 1*net.batch;
|
||||
gstate.input = cuda_make_array(0, x_size);
|
||||
gstate.truth = 0;
|
||||
gstate.delta = 0;
|
||||
gstate.train = 1;
|
||||
float *X = calloc(x_size, sizeof(float));
|
||||
float *y = calloc(y_size, sizeof(float));
|
||||
|
||||
network_state astate = {0};
|
||||
astate.index = 0;
|
||||
astate.net = anet;
|
||||
int ay_size = get_network_output_size(anet)*anet.batch;
|
||||
astate.input = 0;
|
||||
astate.truth = 0;
|
||||
astate.delta = 0;
|
||||
astate.train = 1;
|
||||
|
||||
float *imerror = cuda_make_array(0, imlayer.outputs);
|
||||
float *ones_gpu = cuda_make_array(0, ay_size);
|
||||
fill_ongpu(ay_size, 1, ones_gpu, 1);
|
||||
|
||||
float aloss_avg = -1;
|
||||
float gloss_avg = -1;
|
||||
|
||||
//data generated = copy_data(train);
|
||||
|
||||
while (get_current_batch(net) < net.max_batches) {
|
||||
i += 1;
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data_in_thread(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
data generated = copy_data(train);
|
||||
time=clock();
|
||||
float gloss = 0;
|
||||
|
||||
for(j = 0; j < net.subdivisions; ++j){
|
||||
get_next_batch(train, net.batch, j*net.batch, X, y);
|
||||
cuda_push_array(gstate.input, X, x_size);
|
||||
*net.seen += net.batch;
|
||||
forward_network_gpu(net, gstate);
|
||||
|
||||
fill_ongpu(imlayer.outputs, 0, imerror, 1);
|
||||
astate.input = imlayer.output_gpu;
|
||||
astate.delta = imerror;
|
||||
astate.truth = ones_gpu;
|
||||
forward_network_gpu(anet, astate);
|
||||
backward_network_gpu(anet, astate);
|
||||
|
||||
scal_ongpu(imlayer.outputs, 1, imerror, 1);
|
||||
axpy_ongpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1);
|
||||
|
||||
backward_network_gpu(net, gstate);
|
||||
|
||||
printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs));
|
||||
printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs));
|
||||
|
||||
gloss += get_network_cost(net) /(net.subdivisions*net.batch);
|
||||
|
||||
cuda_pull_array(imlayer.output_gpu, imlayer.output, x_size);
|
||||
for(k = 0; k < net.batch; ++k){
|
||||
int index = j*net.batch + k;
|
||||
copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1);
|
||||
generated.y.vals[index][0] = 0;
|
||||
}
|
||||
}
|
||||
harmless_update_network_gpu(anet);
|
||||
|
||||
data merge = concat_data(train, generated);
|
||||
randomize_data(merge);
|
||||
float aloss = train_network(anet, merge);
|
||||
|
||||
update_network_gpu(net);
|
||||
update_network_gpu(anet);
|
||||
free_data(merge);
|
||||
free_data(train);
|
||||
free_data(generated);
|
||||
if (aloss_avg < 0) aloss_avg = aloss;
|
||||
aloss_avg = aloss_avg*.9 + aloss*.1;
|
||||
gloss_avg = gloss_avg*.9 + gloss*.1;
|
||||
|
||||
printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs);
|
||||
if(i%1000==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
||||
save_weights(net, buff);
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i);
|
||||
save_weights(anet, buff);
|
||||
}
|
||||
if(i%100==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, abase);
|
||||
save_weights(anet, buff);
|
||||
}
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
#endif
|
||||
}
|
||||
|
||||
void train_lsd(char *cfgfile, char *weightfile, int clear)
|
||||
{
|
||||
char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
srand(time(0));
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
float avg_loss = -1;
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
if(clear) *net.seen = 0;
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = net.batch*net.subdivisions;
|
||||
int i = *net.seen/imgs;
|
||||
data train, buffer;
|
||||
|
||||
|
||||
list *plist = get_paths(train_images);
|
||||
//int N = plist->size;
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.paths = paths;
|
||||
args.n = imgs;
|
||||
args.m = plist->size;
|
||||
args.d = &buffer;
|
||||
|
||||
args.min = net.min_crop;
|
||||
args.max = net.max_crop;
|
||||
args.angle = net.angle;
|
||||
args.aspect = net.aspect;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.size = net.w;
|
||||
args.type = CLASSIFICATION_DATA;
|
||||
args.classes = 1;
|
||||
char *ls[1] = {"coco"};
|
||||
args.labels = ls;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
while(get_current_batch(net) < net.max_batches){
|
||||
i += 1;
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data_in_thread(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
time=clock();
|
||||
float loss = train_network(net, train);
|
||||
if (avg_loss < 0) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
|
||||
printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
|
||||
if(i%1000==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(i%100==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
free_data(train);
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
|
||||
void test_lsd(char *cfgfile, char *weightfile, char *filename)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
|
||||
clock_t time;
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
int i, imlayer = 0;
|
||||
|
||||
for (i = 0; i < net.n; ++i) {
|
||||
if (net.layers[i].out_c == 3) {
|
||||
imlayer = i;
|
||||
printf("%d\n", i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while(1){
|
||||
if(filename){
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input, 0, 0);
|
||||
image resized = resize_min(im, net.w);
|
||||
image crop = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h);
|
||||
//grayscale_image_3c(crop);
|
||||
|
||||
float *X = crop.data;
|
||||
time=clock();
|
||||
network_predict(net, X);
|
||||
image out = get_network_image_layer(net, imlayer);
|
||||
//yuv_to_rgb(out);
|
||||
constrain_image(out);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
show_image(out, "out");
|
||||
show_image(crop, "crop");
|
||||
save_image(out, "out");
|
||||
#ifdef OPENCV
|
||||
cvWaitKey(0);
|
||||
#endif
|
||||
|
||||
free_image(im);
|
||||
free_image(resized);
|
||||
free_image(crop);
|
||||
if (filename) break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void run_lsd(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
int clear = find_arg(argc, argv, "-clear");
|
||||
|
||||
char *cfg = argv[3];
|
||||
char *weights = (argc > 4) ? argv[4] : 0;
|
||||
char *filename = (argc > 5) ? argv[5] : 0;
|
||||
char *acfg = argv[5];
|
||||
char *aweights = (argc > 6) ? argv[6] : 0;
|
||||
if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear);
|
||||
else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear);
|
||||
else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear);
|
||||
else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear);
|
||||
else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename);
|
||||
/*
|
||||
else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights);
|
||||
*/
|
||||
}
|
15
src/matrix.c
15
src/matrix.c
@ -1,5 +1,6 @@
|
||||
#include "matrix.h"
|
||||
#include "utils.h"
|
||||
#include "blas.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -73,6 +74,20 @@ void matrix_add_matrix(matrix from, matrix to)
|
||||
}
|
||||
}
|
||||
|
||||
matrix copy_matrix(matrix m)
|
||||
{
|
||||
matrix c = {0};
|
||||
c.rows = m.rows;
|
||||
c.cols = m.cols;
|
||||
c.vals = calloc(c.rows, sizeof(float *));
|
||||
int i;
|
||||
for(i = 0; i < c.rows; ++i){
|
||||
c.vals[i] = calloc(c.cols, sizeof(float));
|
||||
copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
matrix make_matrix(int rows, int cols)
|
||||
{
|
||||
int i;
|
||||
|
@ -6,6 +6,7 @@ typedef struct matrix{
|
||||
} matrix;
|
||||
|
||||
matrix make_matrix(int rows, int cols);
|
||||
matrix copy_matrix(matrix m);
|
||||
void free_matrix(matrix m);
|
||||
void print_matrix(matrix m);
|
||||
|
||||
|
@ -27,6 +27,35 @@
|
||||
#include "dropout_layer.h"
|
||||
#include "route_layer.h"
|
||||
#include "shortcut_layer.h"
|
||||
#include "parser.h"
|
||||
#include "data.h"
|
||||
|
||||
load_args get_base_args(network net)
|
||||
{
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.size = net.w;
|
||||
|
||||
args.min = net.min_crop;
|
||||
args.max = net.max_crop;
|
||||
args.angle = net.angle;
|
||||
args.aspect = net.aspect;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
return args;
|
||||
}
|
||||
|
||||
network load_network(char *cfg, char *weights, int clear)
|
||||
{
|
||||
network net = parse_network_cfg(cfg);
|
||||
if(weights && weights[0] != 0){
|
||||
load_weights(&net, weights);
|
||||
}
|
||||
if(clear) *net.seen = 0;
|
||||
return net;
|
||||
}
|
||||
|
||||
int get_current_batch(network net)
|
||||
{
|
||||
@ -50,6 +79,7 @@ float get_current_rate(network net)
|
||||
int batch_num = get_current_batch(net);
|
||||
int i;
|
||||
float rate;
|
||||
if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power);
|
||||
switch (net.policy) {
|
||||
case CONSTANT:
|
||||
return net.learning_rate;
|
||||
@ -66,7 +96,6 @@ float get_current_rate(network net)
|
||||
case EXP:
|
||||
return net.learning_rate * pow(net.gamma, batch_num);
|
||||
case POLY:
|
||||
if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power);
|
||||
return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power);
|
||||
case RANDOM:
|
||||
return net.learning_rate * pow(rand_uniform(0,1), net.power);
|
||||
@ -150,7 +179,7 @@ void forward_network(network net, network_state state)
|
||||
state.index = i;
|
||||
layer l = net.layers[i];
|
||||
if(l.delta){
|
||||
scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
|
||||
fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
|
||||
}
|
||||
l.forward(l, state);
|
||||
state.input = l.output;
|
||||
@ -165,7 +194,7 @@ void update_network(network net)
|
||||
for(i = 0; i < net.n; ++i){
|
||||
layer l = net.layers[i];
|
||||
if(l.update){
|
||||
l.update(l, update_batch, rate, net.momentum, net.decay);
|
||||
l.update(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -218,6 +247,7 @@ void backward_network(network net, network_state state)
|
||||
state.delta = prev.delta;
|
||||
}
|
||||
layer l = net.layers[i];
|
||||
if(l.stopbackward) break;
|
||||
l.backward(l, state);
|
||||
}
|
||||
}
|
||||
@ -414,6 +444,9 @@ detection_layer get_network_detection_layer(network net)
|
||||
image get_network_image_layer(network net, int i)
|
||||
{
|
||||
layer l = net.layers[i];
|
||||
#ifdef GPU
|
||||
cuda_pull_array(l.output_gpu, l.output, l.outputs);
|
||||
#endif
|
||||
if (l.out_w && l.out_h && l.out_c){
|
||||
return float_to_image(l.out_w, l.out_h, l.out_c, l.output);
|
||||
}
|
||||
|
@ -43,6 +43,7 @@ typedef struct network{
|
||||
float eps;
|
||||
|
||||
int inputs;
|
||||
int notruth;
|
||||
int h, w, c;
|
||||
int max_crop;
|
||||
int min_crop;
|
||||
@ -82,6 +83,7 @@ float *get_network_output_gpu(network net);
|
||||
void forward_network_gpu(network net, network_state state);
|
||||
void backward_network_gpu(network net, network_state state);
|
||||
void update_network_gpu(network net);
|
||||
void harmless_update_network_gpu(network net);
|
||||
#endif
|
||||
|
||||
float get_current_rate(network net);
|
||||
@ -121,6 +123,8 @@ int resize_network(network *net, int w, int h);
|
||||
void set_batch_network(network *net, int b);
|
||||
int get_network_input_size(network net);
|
||||
float get_network_cost(network net);
|
||||
network load_network(char *cfg, char *weights, int clear);
|
||||
load_args get_base_args(network net);
|
||||
|
||||
int get_network_nuisance(network net);
|
||||
int get_network_background(network net);
|
||||
|
@ -50,8 +50,10 @@ void forward_network_gpu(network net, network_state state)
|
||||
if(l.delta_gpu){
|
||||
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
|
||||
}
|
||||
//if(l.c ==3 && i > 5) state.input = *net.input_gpu;
|
||||
l.forward_gpu(l, state);
|
||||
state.input = l.output_gpu;
|
||||
if(l.truth) state.truth = l.output_gpu;
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,6 +66,7 @@ void backward_network_gpu(network net, network_state state)
|
||||
for(i = net.n-1; i >= 0; --i){
|
||||
state.index = i;
|
||||
layer l = net.layers[i];
|
||||
if(l.stopbackward) break;
|
||||
if(i == 0){
|
||||
state.input = original_input;
|
||||
state.delta = original_delta;
|
||||
@ -86,11 +89,18 @@ void update_network_gpu(network net)
|
||||
layer l = net.layers[i];
|
||||
l.t = get_current_batch(net);
|
||||
if(l.update_gpu){
|
||||
l.update_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void harmless_update_network_gpu(network net)
|
||||
{
|
||||
net.learning_rate = 0;
|
||||
net.momentum = 1;
|
||||
update_network_gpu(net);
|
||||
}
|
||||
|
||||
void forward_backward_network_gpu(network net, float *x, float *y)
|
||||
{
|
||||
network_state state;
|
||||
@ -101,10 +111,10 @@ void forward_backward_network_gpu(network net, float *x, float *y)
|
||||
if(net.layers[net.n-1].truths) y_size = net.layers[net.n-1].truths*net.batch;
|
||||
if(!*net.input_gpu){
|
||||
*net.input_gpu = cuda_make_array(x, x_size);
|
||||
*net.truth_gpu = cuda_make_array(y, y_size);
|
||||
if(!net.notruth) *net.truth_gpu = cuda_make_array(y, y_size);
|
||||
}else{
|
||||
cuda_push_array(*net.input_gpu, x, x_size);
|
||||
cuda_push_array(*net.truth_gpu, y, y_size);
|
||||
if(!net.notruth) cuda_push_array(*net.truth_gpu, y, y_size);
|
||||
}
|
||||
state.input = *net.input_gpu;
|
||||
state.delta = 0;
|
||||
@ -180,7 +190,7 @@ void update_layer(layer l, network net)
|
||||
float rate = get_current_rate(net);
|
||||
l.t = get_current_batch(net);
|
||||
if(l.update_gpu){
|
||||
l.update_gpu(l, update_batch, rate, net.momentum, net.decay);
|
||||
l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
|
||||
}
|
||||
}
|
||||
|
||||
|
114
src/nightmare.c
114
src/nightmare.c
@ -52,6 +52,7 @@ void optimize_picture(network *net, image orig, int max_layer, float scale, floa
|
||||
image delta = make_image(im.w, im.h, im.c);
|
||||
|
||||
network_state state = {0};
|
||||
state.net = *net;
|
||||
|
||||
#ifdef GPU
|
||||
state.input = cuda_make_array(im.data, im.w*im.h*im.c);
|
||||
@ -142,6 +143,7 @@ void reconstruct_picture(network net, float *features, image recon, image update
|
||||
image delta = make_image(recon.w, recon.h, recon.c);
|
||||
|
||||
network_state state = {0};
|
||||
state.net = net;
|
||||
#ifdef GPU
|
||||
state.input = cuda_make_array(recon.data, recon.w*recon.h*recon.c);
|
||||
state.delta = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
|
||||
@ -178,6 +180,113 @@ void reconstruct_picture(network net, float *features, image recon, image update
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void run_lsd(int argc, char **argv)
|
||||
{
|
||||
srand(0);
|
||||
if(argc < 3){
|
||||
fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
char *cfg = argv[2];
|
||||
char *weights = argv[3];
|
||||
char *input = argv[4];
|
||||
|
||||
int norm = find_int_arg(argc, argv, "-norm", 1);
|
||||
int rounds = find_int_arg(argc, argv, "-rounds", 1);
|
||||
int iters = find_int_arg(argc, argv, "-iters", 10);
|
||||
float rate = find_float_arg(argc, argv, "-rate", .04);
|
||||
float momentum = find_float_arg(argc, argv, "-momentum", .9);
|
||||
float lambda = find_float_arg(argc, argv, "-lambda", .01);
|
||||
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
|
||||
int reconstruct = find_arg(argc, argv, "-reconstruct");
|
||||
int smooth_size = find_int_arg(argc, argv, "-smooth", 1);
|
||||
|
||||
network net = parse_network_cfg(cfg);
|
||||
load_weights(&net, weights);
|
||||
char *cfgbase = basecfg(cfg);
|
||||
char *imbase = basecfg(input);
|
||||
|
||||
set_batch_network(&net, 1);
|
||||
image im = load_image_color(input, 0, 0);
|
||||
|
||||
float *features = 0;
|
||||
image update;
|
||||
if (reconstruct){
|
||||
im = letterbox_image(im, net.w, net.h);
|
||||
|
||||
int zz = 0;
|
||||
network_predict(net, im.data);
|
||||
image out_im = get_network_image(net);
|
||||
image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz);
|
||||
//flip_image(crop);
|
||||
image f_im = resize_image(crop, out_im.w, out_im.h);
|
||||
free_image(crop);
|
||||
printf("%d features\n", out_im.w*out_im.h*out_im.c);
|
||||
|
||||
|
||||
im = resize_image(im, im.w, im.h);
|
||||
f_im = resize_image(f_im, f_im.w, f_im.h);
|
||||
features = f_im.data;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 14*14*512; ++i){
|
||||
features[i] += rand_uniform(-.19, .19);
|
||||
}
|
||||
|
||||
free_image(im);
|
||||
im = make_random_image(im.w, im.h, im.c);
|
||||
update = make_image(im.w, im.h, im.c);
|
||||
|
||||
}
|
||||
|
||||
int e;
|
||||
int n;
|
||||
for(e = 0; e < rounds; ++e){
|
||||
fprintf(stderr, "Iteration: ");
|
||||
fflush(stderr);
|
||||
for(n = 0; n < iters; ++n){
|
||||
fprintf(stderr, "%d, ", n);
|
||||
fflush(stderr);
|
||||
if(reconstruct){
|
||||
reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1);
|
||||
//if ((n+1)%30 == 0) rate *= .5;
|
||||
show_image(im, "reconstruction");
|
||||
#ifdef OPENCV
|
||||
cvWaitKey(10);
|
||||
#endif
|
||||
}else{
|
||||
int layer = max_layer + rand()%range - range/2;
|
||||
int octave = rand()%octaves;
|
||||
optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "done\n");
|
||||
char buff[256];
|
||||
if (prefix){
|
||||
sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e);
|
||||
}else{
|
||||
sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e);
|
||||
}
|
||||
printf("%d %s\n", e, buff);
|
||||
save_image(im, buff);
|
||||
//show_image(im, buff);
|
||||
//cvWaitKey(0);
|
||||
|
||||
if(rotate){
|
||||
image rot = rotate_image(im, rotate);
|
||||
free_image(im);
|
||||
im = rot;
|
||||
}
|
||||
image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom);
|
||||
image resized = resize_image(crop, im.w, im.h);
|
||||
free_image(im);
|
||||
free_image(crop);
|
||||
im = resized;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
void run_nightmare(int argc, char **argv)
|
||||
{
|
||||
@ -224,6 +333,7 @@ void run_nightmare(int argc, char **argv)
|
||||
free_image(im);
|
||||
im = resized;
|
||||
}
|
||||
im = letterbox_image(im, net.w, net.h);
|
||||
|
||||
float *features = 0;
|
||||
image update;
|
||||
@ -246,13 +356,11 @@ void run_nightmare(int argc, char **argv)
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 14*14*512; ++i){
|
||||
features[i] += rand_uniform(-.19, .19);
|
||||
//features[i] += rand_uniform(-.19, .19);
|
||||
}
|
||||
|
||||
free_image(im);
|
||||
im = make_random_image(im.w, im.h, im.c);
|
||||
update = make_image(im.w, im.h, im.c);
|
||||
|
||||
}
|
||||
|
||||
int e;
|
||||
|
51
src/parser.c
51
src/parser.c
@ -9,6 +9,7 @@
|
||||
#include "batchnorm_layer.h"
|
||||
#include "blas.h"
|
||||
#include "connected_layer.h"
|
||||
#include "deconvolutional_layer.h"
|
||||
#include "convolutional_layer.h"
|
||||
#include "cost_layer.h"
|
||||
#include "crnn_layer.h"
|
||||
@ -48,6 +49,8 @@ LAYER_TYPE string_to_layer_type(char * type)
|
||||
if (strcmp(type, "[local]")==0) return LOCAL;
|
||||
if (strcmp(type, "[conv]")==0
|
||||
|| strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
|
||||
if (strcmp(type, "[deconv]")==0
|
||||
|| strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL;
|
||||
if (strcmp(type, "[activation]")==0) return ACTIVE;
|
||||
if (strcmp(type, "[net]")==0
|
||||
|| strcmp(type, "[network]")==0) return NETWORK;
|
||||
@ -135,6 +138,29 @@ local_layer parse_local(list *options, size_params params)
|
||||
return layer;
|
||||
}
|
||||
|
||||
layer parse_deconvolutional(list *options, size_params params)
|
||||
{
|
||||
int n = option_find_int(options, "filters",1);
|
||||
int size = option_find_int(options, "size",1);
|
||||
int stride = option_find_int(options, "stride",1);
|
||||
|
||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||
ACTIVATION activation = get_activation(activation_s);
|
||||
|
||||
int batch,h,w,c;
|
||||
h = params.h;
|
||||
w = params.w;
|
||||
c = params.c;
|
||||
batch=params.batch;
|
||||
if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
|
||||
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
|
||||
|
||||
layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation, batch_normalize);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
convolutional_layer parse_convolutional(list *options, size_params params)
|
||||
{
|
||||
int n = option_find_int(options, "filters",1);
|
||||
@ -312,6 +338,7 @@ cost_layer parse_cost(list *options, size_params params)
|
||||
float scale = option_find_float_quiet(options, "scale",1);
|
||||
cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
|
||||
layer.ratio = option_find_float_quiet(options, "ratio",0);
|
||||
layer.thresh = option_find_float_quiet(options, "thresh",0);
|
||||
return layer;
|
||||
}
|
||||
|
||||
@ -343,6 +370,8 @@ layer parse_reorg(list *options, size_params params)
|
||||
{
|
||||
int stride = option_find_int(options, "stride",1);
|
||||
int reverse = option_find_int_quiet(options, "reverse",0);
|
||||
int flatten = option_find_int_quiet(options, "flatten",0);
|
||||
int extra = option_find_int_quiet(options, "extra",0);
|
||||
|
||||
int batch,h,w,c;
|
||||
h = params.h;
|
||||
@ -351,7 +380,7 @@ layer parse_reorg(list *options, size_params params)
|
||||
batch=params.batch;
|
||||
if(!(h && w && c)) error("Layer before reorg layer must output image.");
|
||||
|
||||
layer layer = make_reorg_layer(batch,w,h,c,stride,reverse);
|
||||
layer layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra);
|
||||
return layer;
|
||||
}
|
||||
|
||||
@ -508,6 +537,7 @@ void parse_net_options(list *options, network *net)
|
||||
net->decay = option_find_float(options, "decay", .0001);
|
||||
int subdivs = option_find_int(options, "subdivisions",1);
|
||||
net->time_steps = option_find_int_quiet(options, "time_steps",1);
|
||||
net->notruth = option_find_int_quiet(options, "notruth",0);
|
||||
net->batch /= subdivs;
|
||||
net->batch *= net->time_steps;
|
||||
net->subdivisions = subdivs;
|
||||
@ -537,6 +567,7 @@ void parse_net_options(list *options, network *net)
|
||||
char *policy_s = option_find_str(options, "policy", "constant");
|
||||
net->policy = get_policy(policy_s);
|
||||
net->burn_in = option_find_int_quiet(options, "burn_in", 0);
|
||||
net->power = option_find_float_quiet(options, "power", 4);
|
||||
if(net->policy == STEP){
|
||||
net->step = option_find_int(options, "step", 1);
|
||||
net->scale = option_find_float(options, "scale", 1);
|
||||
@ -570,7 +601,6 @@ void parse_net_options(list *options, network *net)
|
||||
net->gamma = option_find_float(options, "gamma", 1);
|
||||
net->step = option_find_int(options, "step", 1);
|
||||
} else if (net->policy == POLY || net->policy == RANDOM){
|
||||
net->power = option_find_float(options, "power", 1);
|
||||
}
|
||||
net->max_batches = option_find_int(options, "max_batches", 0);
|
||||
}
|
||||
@ -617,6 +647,8 @@ network parse_network_cfg(char *filename)
|
||||
LAYER_TYPE lt = string_to_layer_type(s->type);
|
||||
if(lt == CONVOLUTIONAL){
|
||||
l = parse_convolutional(options, params);
|
||||
}else if(lt == DECONVOLUTIONAL){
|
||||
l = parse_deconvolutional(options, params);
|
||||
}else if(lt == LOCAL){
|
||||
l = parse_local(options, params);
|
||||
}else if(lt == ACTIVE){
|
||||
@ -665,8 +697,13 @@ network parse_network_cfg(char *filename)
|
||||
}else{
|
||||
fprintf(stderr, "Type not recognized: %s\n", s->type);
|
||||
}
|
||||
l.truth = option_find_int_quiet(options, "truth", 0);
|
||||
l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
|
||||
l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
|
||||
l.dontload = option_find_int_quiet(options, "dontload", 0);
|
||||
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
|
||||
l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
|
||||
l.smooth = option_find_float_quiet(options, "smooth", 0);
|
||||
option_unused(options);
|
||||
net.layers[count] = l;
|
||||
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
|
||||
@ -840,7 +877,7 @@ void save_weights_upto(network net, char *filename, int cutoff)
|
||||
int i;
|
||||
for(i = 0; i < net.n && i < cutoff; ++i){
|
||||
layer l = net.layers[i];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
|
||||
save_convolutional_weights(l, fp);
|
||||
} if(l.type == CONNECTED){
|
||||
save_connected_weights(l, fp);
|
||||
@ -1005,7 +1042,7 @@ void load_convolutional_weights(layer l, FILE *fp)
|
||||
}
|
||||
|
||||
|
||||
void load_weights_upto(network *net, char *filename, int cutoff)
|
||||
void load_weights_upto(network *net, char *filename, int start, int cutoff)
|
||||
{
|
||||
#ifdef GPU
|
||||
if(net->gpu_index >= 0){
|
||||
@ -1027,10 +1064,10 @@ void load_weights_upto(network *net, char *filename, int cutoff)
|
||||
int transpose = (major > 1000) || (minor > 1000);
|
||||
|
||||
int i;
|
||||
for(i = 0; i < net->n && i < cutoff; ++i){
|
||||
for(i = start; i < net->n && i < cutoff; ++i){
|
||||
layer l = net->layers[i];
|
||||
if (l.dontload) continue;
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
|
||||
load_convolutional_weights(l, fp);
|
||||
}
|
||||
if(l.type == CONNECTED){
|
||||
@ -1075,6 +1112,6 @@ void load_weights_upto(network *net, char *filename, int cutoff)
|
||||
|
||||
void load_weights(network *net, char *filename)
|
||||
{
|
||||
load_weights_upto(net, filename, net->n);
|
||||
load_weights_upto(net, filename, 0, net->n);
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,6 @@ void save_weights(network net, char *filename);
|
||||
void save_weights_upto(network net, char *filename, int cutoff);
|
||||
void save_weights_double(network net, char *filename);
|
||||
void load_weights(network *net, char *filename);
|
||||
void load_weights_upto(network *net, char *filename, int cutoff);
|
||||
void load_weights_upto(network *net, char *filename, int start, int cutoff);
|
||||
|
||||
#endif
|
||||
|
@ -18,6 +18,10 @@ layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
|
||||
l.batch = batch;
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = n*(classes + coords + 1);
|
||||
l.out_w = l.w;
|
||||
l.out_h = l.h;
|
||||
l.out_c = l.c;
|
||||
l.classes = classes;
|
||||
l.coords = coords;
|
||||
l.cost = calloc(1, sizeof(float));
|
||||
@ -68,19 +72,19 @@ void resize_region_layer(layer *l, int w, int h)
|
||||
#endif
|
||||
}
|
||||
|
||||
box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h)
|
||||
box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
|
||||
{
|
||||
box b;
|
||||
b.x = (i + logistic_activate(x[index + 0])) / w;
|
||||
b.y = (j + logistic_activate(x[index + 1])) / h;
|
||||
b.w = exp(x[index + 2]) * biases[2*n] / w;
|
||||
b.h = exp(x[index + 3]) * biases[2*n+1] / h;
|
||||
b.x = (i + x[index + 0*stride]) / w;
|
||||
b.y = (j + x[index + 1*stride]) / h;
|
||||
b.w = exp(x[index + 2*stride]) * biases[2*n] / w;
|
||||
b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
|
||||
return b;
|
||||
}
|
||||
|
||||
float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale)
|
||||
float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride)
|
||||
{
|
||||
box pred = get_region_box(x, biases, n, index, i, j, w, h);
|
||||
box pred = get_region_box(x, biases, n, index, i, j, w, h, stride);
|
||||
float iou = box_iou(pred, truth);
|
||||
|
||||
float tx = (truth.x*w - i);
|
||||
@ -88,34 +92,34 @@ float delta_region_box(box truth, float *x, float *biases, int n, int index, int
|
||||
float tw = log(truth.w*w / biases[2*n]);
|
||||
float th = log(truth.h*h / biases[2*n + 1]);
|
||||
|
||||
delta[index + 0] = scale * (tx - logistic_activate(x[index + 0])) * logistic_gradient(logistic_activate(x[index + 0]));
|
||||
delta[index + 1] = scale * (ty - logistic_activate(x[index + 1])) * logistic_gradient(logistic_activate(x[index + 1]));
|
||||
delta[index + 2] = scale * (tw - x[index + 2]);
|
||||
delta[index + 3] = scale * (th - x[index + 3]);
|
||||
delta[index + 0*stride] = scale * (tx - x[index + 0*stride]);
|
||||
delta[index + 1*stride] = scale * (ty - x[index + 1*stride]);
|
||||
delta[index + 2*stride] = scale * (tw - x[index + 2*stride]);
|
||||
delta[index + 3*stride] = scale * (th - x[index + 3*stride]);
|
||||
return iou;
|
||||
}
|
||||
|
||||
void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, float *avg_cat)
|
||||
void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat)
|
||||
{
|
||||
int i, n;
|
||||
if(hier){
|
||||
float pred = 1;
|
||||
while(class >= 0){
|
||||
pred *= output[index + class];
|
||||
pred *= output[index + stride*class];
|
||||
int g = hier->group[class];
|
||||
int offset = hier->group_offset[g];
|
||||
for(i = 0; i < hier->group_size[g]; ++i){
|
||||
delta[index + offset + i] = scale * (0 - output[index + offset + i]);
|
||||
delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]);
|
||||
}
|
||||
delta[index + class] = scale * (1 - output[index + class]);
|
||||
delta[index + stride*class] = scale * (1 - output[index + stride*class]);
|
||||
|
||||
class = hier->parent[class];
|
||||
}
|
||||
*avg_cat += pred;
|
||||
} else {
|
||||
for(n = 0; n < classes; ++n){
|
||||
delta[index + n] = scale * (((n == class)?1 : 0) - output[index + n]);
|
||||
if(n == class) *avg_cat += output[index + n];
|
||||
delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]);
|
||||
if(n == class) *avg_cat += output[index + stride*n];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -130,42 +134,35 @@ float tisnan(float x)
|
||||
return (x != x);
|
||||
}
|
||||
|
||||
int entry_index(layer l, int batch, int location, int entry)
|
||||
{
|
||||
int n = location / (l.w*l.h);
|
||||
int loc = location % (l.w*l.h);
|
||||
return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc;
|
||||
}
|
||||
|
||||
void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output);
|
||||
void forward_region_layer(const layer l, network_state state)
|
||||
{
|
||||
int i,j,b,t,n;
|
||||
int size = l.coords + l.classes + 1;
|
||||
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
|
||||
#ifndef GPU
|
||||
flatten(l.output, l.w*l.h, size*l.n, l.batch, 1);
|
||||
#endif
|
||||
for (b = 0; b < l.batch; ++b){
|
||||
for(i = 0; i < l.h*l.w*l.n; ++i){
|
||||
int index = size*i + b*l.outputs;
|
||||
l.output[index + 4] = logistic_activate(l.output[index + 4]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifndef GPU
|
||||
if (l.softmax_tree){
|
||||
for (b = 0; b < l.batch; ++b){
|
||||
for(i = 0; i < l.h*l.w*l.n; ++i){
|
||||
int index = size*i + b*l.outputs;
|
||||
softmax_tree(l.output + index + 5, 1, 0, 1, l.softmax_tree, l.output + index + 5);
|
||||
}
|
||||
int i;
|
||||
int count = 5;
|
||||
for (i = 0; i < l.softmax_tree->groups; ++i) {
|
||||
int group_size = l.softmax_tree->group_size[i];
|
||||
softmax_cpu(state.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count);
|
||||
count += group_size;
|
||||
}
|
||||
} else if (l.softmax){
|
||||
for (b = 0; b < l.batch; ++b){
|
||||
for(i = 0; i < l.h*l.w*l.n; ++i){
|
||||
int index = size*i + b*l.outputs;
|
||||
softmax(l.output + index + 5, l.classes, 1, l.output + index + 5);
|
||||
}
|
||||
}
|
||||
softmax_cpu(state.input + 5, l.classes, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + 5);
|
||||
}
|
||||
#endif
|
||||
if(!state.train) return;
|
||||
|
||||
memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
|
||||
if(!state.train) return;
|
||||
float avg_iou = 0;
|
||||
float recall = 0;
|
||||
float avg_cat = 0;
|
||||
@ -178,26 +175,28 @@ void forward_region_layer(const layer l, network_state state)
|
||||
if(l.softmax_tree){
|
||||
int onlyclass = 0;
|
||||
for(t = 0; t < 30; ++t){
|
||||
box truth = float_to_box(state.truth + t*5 + b*l.truths);
|
||||
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
|
||||
if(!truth.x) break;
|
||||
int class = state.truth[t*5 + b*l.truths + 4];
|
||||
float maxp = 0;
|
||||
int maxi = 0;
|
||||
if(truth.x > 100000 && truth.y > 100000){
|
||||
for(n = 0; n < l.n*l.w*l.h; ++n){
|
||||
int index = size*n + b*l.outputs + 5;
|
||||
float scale = l.output[index-1];
|
||||
l.delta[index - 1] = l.noobject_scale * ((0 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
|
||||
float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class);
|
||||
int class_index = entry_index(l, b, n, 5);
|
||||
int obj_index = entry_index(l, b, n, 4);
|
||||
float scale = l.output[obj_index];
|
||||
l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]);
|
||||
float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h);
|
||||
if(p > maxp){
|
||||
maxp = p;
|
||||
maxi = n;
|
||||
}
|
||||
}
|
||||
int index = size*maxi + b*l.outputs + 5;
|
||||
delta_region_class(l.output, l.delta, index, class, l.classes, l.softmax_tree, l.class_scale, &avg_cat);
|
||||
if(l.output[index - 1] < .3) l.delta[index - 1] = l.object_scale * ((.3 - l.output[index - 1]) * logistic_gradient(l.output[index - 1]));
|
||||
else l.delta[index - 1] = 0;
|
||||
int class_index = entry_index(l, b, maxi, 5);
|
||||
int obj_index = entry_index(l, b, maxi, 4);
|
||||
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
|
||||
if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]);
|
||||
else l.delta[obj_index] = 0;
|
||||
++class_count;
|
||||
onlyclass = 1;
|
||||
break;
|
||||
@ -208,21 +207,22 @@ void forward_region_layer(const layer l, network_state state)
|
||||
for (j = 0; j < l.h; ++j) {
|
||||
for (i = 0; i < l.w; ++i) {
|
||||
for (n = 0; n < l.n; ++n) {
|
||||
int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
|
||||
box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
|
||||
int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
|
||||
box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
|
||||
float best_iou = 0;
|
||||
for(t = 0; t < 30; ++t){
|
||||
box truth = float_to_box(state.truth + t*5 + b*l.truths);
|
||||
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
|
||||
if(!truth.x) break;
|
||||
float iou = box_iou(pred, truth);
|
||||
if (iou > best_iou) {
|
||||
best_iou = iou;
|
||||
}
|
||||
}
|
||||
avg_anyobj += l.output[index + 4];
|
||||
l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
|
||||
int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4);
|
||||
avg_anyobj += l.output[obj_index];
|
||||
l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]);
|
||||
if (best_iou > l.thresh) {
|
||||
l.delta[index + 4] = 0;
|
||||
l.delta[obj_index] = 0;
|
||||
}
|
||||
|
||||
if(*(state.net.seen) < 12800){
|
||||
@ -231,17 +231,16 @@ void forward_region_layer(const layer l, network_state state)
|
||||
truth.y = (j + .5)/l.h;
|
||||
truth.w = l.biases[2*n]/l.w;
|
||||
truth.h = l.biases[2*n+1]/l.h;
|
||||
delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01);
|
||||
delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for(t = 0; t < 30; ++t){
|
||||
box truth = float_to_box(state.truth + t*5 + b*l.truths);
|
||||
box truth = float_to_box(state.truth + t*5 + b*l.truths, 1);
|
||||
|
||||
if(!truth.x) break;
|
||||
float best_iou = 0;
|
||||
int best_index = 0;
|
||||
int best_n = 0;
|
||||
i = (truth.x * l.w);
|
||||
j = (truth.y * l.h);
|
||||
@ -251,8 +250,8 @@ void forward_region_layer(const layer l, network_state state)
|
||||
truth_shift.y = 0;
|
||||
//printf("index %d %d\n",i, j);
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs;
|
||||
box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h);
|
||||
int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
|
||||
box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
|
||||
if(l.bias_match){
|
||||
pred.w = l.biases[2*n]/l.w;
|
||||
pred.h = l.biases[2*n+1]/l.h;
|
||||
@ -262,80 +261,118 @@ void forward_region_layer(const layer l, network_state state)
|
||||
pred.y = 0;
|
||||
float iou = box_iou(pred, truth_shift);
|
||||
if (iou > best_iou){
|
||||
best_index = index;
|
||||
best_iou = iou;
|
||||
best_n = n;
|
||||
}
|
||||
}
|
||||
//printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h);
|
||||
|
||||
float iou = delta_region_box(truth, l.output, l.biases, best_n, best_index, i, j, l.w, l.h, l.delta, l.coord_scale);
|
||||
int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0);
|
||||
float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h);
|
||||
if(iou > .5) recall += 1;
|
||||
avg_iou += iou;
|
||||
|
||||
//l.delta[best_index + 4] = iou - l.output[best_index + 4];
|
||||
avg_obj += l.output[best_index + 4];
|
||||
l.delta[best_index + 4] = l.object_scale * (1 - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]);
|
||||
int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4);
|
||||
avg_obj += l.output[obj_index];
|
||||
l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]);
|
||||
if (l.rescore) {
|
||||
l.delta[best_index + 4] = l.object_scale * (iou - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]);
|
||||
l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]);
|
||||
}
|
||||
|
||||
|
||||
int class = state.truth[t*5 + b*l.truths + 4];
|
||||
if (l.map) class = l.map[class];
|
||||
delta_region_class(l.output, l.delta, best_index + 5, class, l.classes, l.softmax_tree, l.class_scale, &avg_cat);
|
||||
int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 5);
|
||||
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
|
||||
++count;
|
||||
++class_count;
|
||||
}
|
||||
}
|
||||
//printf("\n");
|
||||
#ifndef GPU
|
||||
flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0);
|
||||
#endif
|
||||
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
|
||||
printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count);
|
||||
}
|
||||
|
||||
void backward_region_layer(const layer l, network_state state)
|
||||
{
|
||||
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
|
||||
/*
|
||||
int b;
|
||||
int size = l.coords + l.classes + 1;
|
||||
for (b = 0; b < l.batch*l.n; ++b){
|
||||
int index = (b*size + 4)*l.w*l.h;
|
||||
gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index);
|
||||
}
|
||||
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
|
||||
*/
|
||||
}
|
||||
|
||||
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh)
|
||||
{
|
||||
int i,j,n;
|
||||
int i,j,n,z;
|
||||
float *predictions = l.output;
|
||||
if (l.batch == 2) {
|
||||
float *flip = l.output + l.outputs;
|
||||
for (j = 0; j < l.h; ++j) {
|
||||
for (i = 0; i < l.w/2; ++i) {
|
||||
for (n = 0; n < l.n; ++n) {
|
||||
for(z = 0; z < l.classes + 5; ++z){
|
||||
int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i;
|
||||
int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1);
|
||||
float swap = flip[i1];
|
||||
flip[i1] = flip[i2];
|
||||
flip[i2] = swap;
|
||||
if(z == 0){
|
||||
flip[i1] = -flip[i1];
|
||||
flip[i2] = -flip[i2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for(i = 0; i < l.outputs; ++i){
|
||||
l.output[i] = (l.output[i] + flip[i])/2.;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < l.w*l.h; ++i){
|
||||
int row = i / l.w;
|
||||
int col = i % l.w;
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = i*l.n + n;
|
||||
int p_index = index * (l.classes + 5) + 4;
|
||||
float scale = predictions[p_index];
|
||||
int box_index = index * (l.classes + 5);
|
||||
boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h);
|
||||
int index = n*l.w*l.h + i;
|
||||
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
|
||||
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
|
||||
float scale = predictions[obj_index];
|
||||
boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
|
||||
if(1){
|
||||
int max = w > h ? w : h;
|
||||
boxes[index].x = (boxes[index].x - (max - w)/2./max) / ((float)w/max);
|
||||
boxes[index].y = (boxes[index].y - (max - h)/2./max) / ((float)h/max);
|
||||
boxes[index].w *= (float)max/w;
|
||||
boxes[index].h *= (float)max/h;
|
||||
}
|
||||
boxes[index].x *= w;
|
||||
boxes[index].y *= h;
|
||||
boxes[index].w *= w;
|
||||
boxes[index].h *= h;
|
||||
|
||||
int class_index = index * (l.classes + 5) + 5;
|
||||
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5);
|
||||
if(l.softmax_tree){
|
||||
|
||||
hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);
|
||||
hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h);
|
||||
if(map){
|
||||
for(j = 0; j < 200; ++j){
|
||||
float prob = scale*predictions[class_index+map[j]];
|
||||
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5 + map[j]);
|
||||
float prob = scale*predictions[class_index];
|
||||
probs[index][j] = (prob > thresh) ? prob : 0;
|
||||
}
|
||||
} else {
|
||||
int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh);
|
||||
int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h);
|
||||
probs[index][j] = (scale > thresh) ? scale : 0;
|
||||
probs[index][l.classes] = scale;
|
||||
}
|
||||
} else {
|
||||
for(j = 0; j < l.classes; ++j){
|
||||
float prob = scale*predictions[class_index+j];
|
||||
int class_index = entry_index(l, 0, n*l.w*l.h + i, 5 + j);
|
||||
float prob = scale*predictions[class_index];
|
||||
probs[index][j] = (prob > thresh) ? prob : 0;
|
||||
}
|
||||
}
|
||||
@ -350,23 +387,33 @@ void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *b
|
||||
|
||||
void forward_region_layer_gpu(const layer l, network_state state)
|
||||
{
|
||||
/*
|
||||
if(!state.train){
|
||||
copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
|
||||
return;
|
||||
}
|
||||
*/
|
||||
flatten_ongpu(state.input, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 1, l.output_gpu);
|
||||
if(l.softmax_tree){
|
||||
copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
|
||||
int b, n;
|
||||
for (b = 0; b < l.batch; ++b){
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = entry_index(l, b, n*l.w*l.h, 0);
|
||||
activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC);
|
||||
index = entry_index(l, b, n*l.w*l.h, 4);
|
||||
activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC);
|
||||
}
|
||||
}
|
||||
if (l.softmax_tree){
|
||||
int i;
|
||||
int count = 5;
|
||||
for (i = 0; i < l.softmax_tree->groups; ++i) {
|
||||
int group_size = l.softmax_tree->group_size[i];
|
||||
softmax_gpu(l.output_gpu+count, group_size, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + count);
|
||||
int index = entry_index(l, 0, 0, count);
|
||||
softmax_gpu(state.input + index, group_size, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
|
||||
count += group_size;
|
||||
}
|
||||
}else if (l.softmax){
|
||||
softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
|
||||
} else if (l.softmax) {
|
||||
int index = entry_index(l, 0, 0, 5);
|
||||
//printf("%d\n", index);
|
||||
softmax_gpu(state.input + index, l.classes, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
|
||||
}
|
||||
if(!state.train || l.onlyforward){
|
||||
cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
|
||||
return;
|
||||
}
|
||||
|
||||
float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
|
||||
@ -382,16 +429,25 @@ void forward_region_layer_gpu(const layer l, network_state state)
|
||||
cpu_state.truth = truth_cpu;
|
||||
cpu_state.input = in_cpu;
|
||||
forward_region_layer(l, cpu_state);
|
||||
//cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
|
||||
cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
|
||||
free(cpu_state.input);
|
||||
if(!state.train) return;
|
||||
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
|
||||
if(cpu_state.truth) free(cpu_state.truth);
|
||||
}
|
||||
|
||||
void backward_region_layer_gpu(layer l, network_state state)
|
||||
void backward_region_layer_gpu(const layer l, network_state state)
|
||||
{
|
||||
flatten_ongpu(l.delta_gpu, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 0, state.delta);
|
||||
int b, n;
|
||||
for (b = 0; b < l.batch; ++b){
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = entry_index(l, b, n*l.w*l.h, 0);
|
||||
gradient_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index);
|
||||
index = entry_index(l, b, n*l.w*l.h, 4);
|
||||
gradient_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index);
|
||||
}
|
||||
}
|
||||
axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
261
src/regressor.c
Normal file
261
src/regressor.c
Normal file
@ -0,0 +1,261 @@
|
||||
#include "network.h"
|
||||
#include "utils.h"
|
||||
#include "parser.h"
|
||||
#include "option_list.h"
|
||||
#include "blas.h"
|
||||
#include "assert.h"
|
||||
#include "cuda.h"
|
||||
#include <sys/time.h>
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
image get_image_from_stream(CvCapture *cap);
|
||||
#endif
|
||||
|
||||
void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
|
||||
{
|
||||
int i;
|
||||
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
printf("%d\n", ngpus);
|
||||
network *nets = calloc(ngpus, sizeof(network));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
srand(seed);
|
||||
#ifdef GPU
|
||||
cuda_set_device(gpus[i]);
|
||||
#endif
|
||||
nets[i] = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&nets[i], weightfile);
|
||||
}
|
||||
if(clear) *nets[i].seen = 0;
|
||||
nets[i].learning_rate *= ngpus;
|
||||
}
|
||||
srand(time(0));
|
||||
network net = nets[0];
|
||||
|
||||
int imgs = net.batch * net.subdivisions * ngpus;
|
||||
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
list *options = read_data_cfg(datacfg);
|
||||
|
||||
char *backup_directory = option_find_str(options, "backup", "/backup/");
|
||||
char *train_list = option_find_str(options, "train", "data/train.list");
|
||||
|
||||
list *plist = get_paths(train_list);
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
int N = plist->size;
|
||||
clock_t time;
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.threads = 32;
|
||||
|
||||
args.min = net.min_crop;
|
||||
args.max = net.max_crop;
|
||||
args.angle = net.angle;
|
||||
args.aspect = net.aspect;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.size = net.w;
|
||||
|
||||
args.paths = paths;
|
||||
args.n = imgs;
|
||||
args.m = N;
|
||||
args.type = REGRESSION_DATA;
|
||||
|
||||
data train;
|
||||
data buffer;
|
||||
pthread_t load_thread;
|
||||
args.d = &buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
int epoch = (*net.seen)/N;
|
||||
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
||||
time=clock();
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
|
||||
float loss = 0;
|
||||
#ifdef GPU
|
||||
if(ngpus == 1){
|
||||
loss = train_network(net, train);
|
||||
} else {
|
||||
loss = train_networks(nets, ngpus, train, 4);
|
||||
}
|
||||
#else
|
||||
loss = train_network(net, train);
|
||||
#endif
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
||||
free_data(train);
|
||||
if(*net.seen/N > epoch){
|
||||
epoch = *net.seen/N;
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(get_current_batch(net)%100 == 0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup",backup_directory,base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.weights", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
|
||||
free_network(net);
|
||||
free_ptrs((void**)paths, plist->size);
|
||||
free_list(plist);
|
||||
free(base);
|
||||
}
|
||||
|
||||
void predict_regressor(char *cfgfile, char *weightfile, char *filename)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
|
||||
clock_t time;
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input, 0, 0);
|
||||
image sized = letterbox_image(im, net.w, net.h);
|
||||
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
printf("Predicted: %f\n", predictions[0]);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
free_image(im);
|
||||
free_image(sized);
|
||||
if (filename) break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
printf("Regressor Demo\n");
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
|
||||
srand(2222222);
|
||||
CvCapture * cap;
|
||||
|
||||
if(filename){
|
||||
cap = cvCaptureFromFile(filename);
|
||||
}else{
|
||||
cap = cvCaptureFromCAM(cam_index);
|
||||
}
|
||||
|
||||
if(!cap) error("Couldn't connect to webcam.\n");
|
||||
cvNamedWindow("Regressor", CV_WINDOW_NORMAL);
|
||||
cvResizeWindow("Regressor", 512, 512);
|
||||
float fps = 0;
|
||||
int i;
|
||||
|
||||
while(1){
|
||||
struct timeval tval_before, tval_after, tval_result;
|
||||
gettimeofday(&tval_before, NULL);
|
||||
|
||||
image in = get_image_from_stream(cap);
|
||||
image in_s = letterbox_image(in, net.w, net.h);
|
||||
show_image(in, "Regressor");
|
||||
|
||||
float *predictions = network_predict(net, in_s.data);
|
||||
|
||||
printf("\033[2J");
|
||||
printf("\033[1;1H");
|
||||
printf("\nFPS:%.0f\n",fps);
|
||||
|
||||
printf("People: %f\n", predictions[0]);
|
||||
|
||||
free_image(in_s);
|
||||
free_image(in);
|
||||
|
||||
cvWaitKey(10);
|
||||
|
||||
gettimeofday(&tval_after, NULL);
|
||||
timersub(&tval_after, &tval_before, &tval_result);
|
||||
float curr = 1000000.f/((long int)tval_result.tv_usec);
|
||||
fps = .9*fps + .1*curr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void run_regressor(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
|
||||
int *gpus = 0;
|
||||
int gpu = 0;
|
||||
int ngpus = 0;
|
||||
if(gpu_list){
|
||||
printf("%s\n", gpu_list);
|
||||
int len = strlen(gpu_list);
|
||||
ngpus = 1;
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
if (gpu_list[i] == ',') ++ngpus;
|
||||
}
|
||||
gpus = calloc(ngpus, sizeof(int));
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
gpus[i] = atoi(gpu_list);
|
||||
gpu_list = strchr(gpu_list, ',')+1;
|
||||
}
|
||||
} else {
|
||||
gpu = gpu_index;
|
||||
gpus = &gpu;
|
||||
ngpus = 1;
|
||||
}
|
||||
|
||||
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||
int clear = find_arg(argc, argv, "-clear");
|
||||
char *data = argv[3];
|
||||
char *cfg = argv[4];
|
||||
char *weights = (argc > 5) ? argv[5] : 0;
|
||||
char *filename = (argc > 6) ? argv[6]: 0;
|
||||
if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights);
|
||||
else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename);
|
||||
}
|
||||
|
||||
|
@ -4,15 +4,17 @@
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
|
||||
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra)
|
||||
{
|
||||
layer l = {0};
|
||||
l.type = REORG;
|
||||
l.batch = batch;
|
||||
l.stride = stride;
|
||||
l.extra = extra;
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.flatten = flatten;
|
||||
if(reverse){
|
||||
l.out_w = w*stride;
|
||||
l.out_h = h*stride;
|
||||
@ -23,10 +25,20 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
|
||||
l.out_c = c*(stride*stride);
|
||||
}
|
||||
l.reverse = reverse;
|
||||
fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c);
|
||||
|
||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||
l.inputs = h*w*c;
|
||||
int output_size = l.out_h * l.out_w * l.out_c * batch;
|
||||
if(l.extra){
|
||||
l.out_w = l.out_h = l.out_c = 0;
|
||||
l.outputs = l.inputs + l.extra;
|
||||
}
|
||||
|
||||
if(extra){
|
||||
fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs);
|
||||
} else {
|
||||
fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c);
|
||||
}
|
||||
int output_size = l.outputs * batch;
|
||||
l.output = calloc(output_size, sizeof(float));
|
||||
l.delta = calloc(output_size, sizeof(float));
|
||||
|
||||
@ -77,17 +89,41 @@ void resize_reorg_layer(layer *l, int w, int h)
|
||||
|
||||
void forward_reorg_layer(const layer l, network_state state)
|
||||
{
|
||||
if(l.reverse){
|
||||
int i;
|
||||
if(l.flatten){
|
||||
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
|
||||
if(l.reverse){
|
||||
flatten(l.output, l.w*l.h, l.c, l.batch, 0);
|
||||
}else{
|
||||
flatten(l.output, l.w*l.h, l.c, l.batch, 1);
|
||||
}
|
||||
} else if (l.extra) {
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
copy_cpu(l.inputs, state.input + i*l.inputs, 1, l.output + i*l.outputs, 1);
|
||||
}
|
||||
} else if (l.reverse){
|
||||
reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output);
|
||||
}else {
|
||||
} else {
|
||||
reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output);
|
||||
}
|
||||
}
|
||||
|
||||
void backward_reorg_layer(const layer l, network_state state)
|
||||
{
|
||||
if(l.reverse){
|
||||
int i;
|
||||
if(l.flatten){
|
||||
memcpy(state.delta, l.delta, l.outputs*l.batch*sizeof(float));
|
||||
if(l.reverse){
|
||||
flatten(state.delta, l.w*l.h, l.c, l.batch, 1);
|
||||
}else{
|
||||
flatten(state.delta, l.w*l.h, l.c, l.batch, 0);
|
||||
}
|
||||
} else if(l.reverse){
|
||||
reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
|
||||
} else if (l.extra) {
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
copy_cpu(l.inputs, l.delta + i*l.outputs, 1, state.delta + i*l.inputs, 1);
|
||||
}
|
||||
}else{
|
||||
reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
|
||||
}
|
||||
@ -96,7 +132,18 @@ void backward_reorg_layer(const layer l, network_state state)
|
||||
#ifdef GPU
|
||||
void forward_reorg_layer_gpu(layer l, network_state state)
|
||||
{
|
||||
if(l.reverse){
|
||||
int i;
|
||||
if(l.flatten){
|
||||
if(l.reverse){
|
||||
flatten_ongpu(state.input, l.w*l.h, l.c, l.batch, 0, l.output_gpu);
|
||||
}else{
|
||||
flatten_ongpu(state.input, l.w*l.h, l.c, l.batch, 1, l.output_gpu);
|
||||
}
|
||||
} else if (l.extra) {
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
copy_ongpu(l.inputs, state.input + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1);
|
||||
}
|
||||
} else if (l.reverse) {
|
||||
reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu);
|
||||
}else {
|
||||
reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu);
|
||||
@ -105,9 +152,20 @@ void forward_reorg_layer_gpu(layer l, network_state state)
|
||||
|
||||
void backward_reorg_layer_gpu(layer l, network_state state)
|
||||
{
|
||||
if(l.reverse){
|
||||
if(l.flatten){
|
||||
if(l.reverse){
|
||||
flatten_ongpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, state.delta);
|
||||
}else{
|
||||
flatten_ongpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, state.delta);
|
||||
}
|
||||
} else if (l.extra) {
|
||||
int i;
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
copy_ongpu(l.inputs, l.delta_gpu + i*l.outputs, 1, state.delta + i*l.inputs, 1);
|
||||
}
|
||||
} else if(l.reverse){
|
||||
reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
|
||||
}else{
|
||||
} else {
|
||||
reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
|
||||
layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse);
|
||||
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra);
|
||||
void resize_reorg_layer(layer *l, int w, int h);
|
||||
void forward_reorg_layer(const layer l, network_state state);
|
||||
void backward_reorg_layer(const layer l, network_state state);
|
||||
|
@ -32,40 +32,24 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
|
||||
return l;
|
||||
}
|
||||
|
||||
void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output)
|
||||
{
|
||||
int b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
int i;
|
||||
int count = 0;
|
||||
for(i = 0; i < hierarchy->groups; ++i){
|
||||
int group_size = hierarchy->group_size[i];
|
||||
softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count);
|
||||
count += group_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void forward_softmax_layer(const softmax_layer l, network_state state)
|
||||
{
|
||||
int b;
|
||||
int inputs = l.inputs / l.groups;
|
||||
int batch = l.batch * l.groups;
|
||||
if(l.softmax_tree){
|
||||
softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output);
|
||||
} else {
|
||||
for(b = 0; b < batch; ++b){
|
||||
softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs);
|
||||
int i;
|
||||
int count = 0;
|
||||
for (i = 0; i < l.softmax_tree->groups; ++i) {
|
||||
int group_size = l.softmax_tree->group_size[i];
|
||||
softmax_cpu(state.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count);
|
||||
count += group_size;
|
||||
}
|
||||
} else {
|
||||
softmax_cpu(state.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output);
|
||||
}
|
||||
}
|
||||
|
||||
void backward_softmax_layer(const softmax_layer l, network_state state)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < l.inputs*l.batch; ++i){
|
||||
state.delta[i] += l.delta[i];
|
||||
}
|
||||
axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, state.delta, 1);
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
@ -77,18 +61,16 @@ void pull_softmax_layer_output(const softmax_layer layer)
|
||||
|
||||
void forward_softmax_layer_gpu(const softmax_layer l, network_state state)
|
||||
{
|
||||
int inputs = l.inputs / l.groups;
|
||||
int batch = l.batch * l.groups;
|
||||
if(l.softmax_tree){
|
||||
int i;
|
||||
int count = 0;
|
||||
for (i = 0; i < l.softmax_tree->groups; ++i) {
|
||||
int group_size = l.softmax_tree->group_size[i];
|
||||
softmax_gpu(state.input+count, group_size, inputs, batch, l.temperature, l.output_gpu + count);
|
||||
softmax_gpu(state.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count);
|
||||
count += group_size;
|
||||
}
|
||||
} else {
|
||||
softmax_gpu(state.input, inputs, inputs, batch, l.temperature, l.output_gpu);
|
||||
softmax_gpu(state.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#endif
|
||||
|
||||
void train_super(char *cfgfile, char *weightfile)
|
||||
void train_super(char *cfgfile, char *weightfile, int clear)
|
||||
{
|
||||
char *train_images = "/data/imagenet/imagenet1k.train.list";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
@ -19,6 +19,7 @@ void train_super(char *cfgfile, char *weightfile)
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
if(clear) *net.seen = 0;
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = net.batch*net.subdivisions;
|
||||
int i = *net.seen/imgs;
|
||||
@ -123,7 +124,8 @@ void run_super(int argc, char **argv)
|
||||
char *cfg = argv[3];
|
||||
char *weights = (argc > 4) ? argv[4] : 0;
|
||||
char *filename = (argc > 5) ? argv[5] : 0;
|
||||
if(0==strcmp(argv[2], "train")) train_super(cfg, weights);
|
||||
int clear = find_arg(argc, argv, "-clear");
|
||||
if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear);
|
||||
else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename);
|
||||
/*
|
||||
else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights);
|
||||
|
14
src/tree.c
14
src/tree.c
@ -24,33 +24,33 @@ void change_leaves(tree *t, char *leaf_list)
|
||||
fprintf(stderr, "Found %d leaves.\n", found);
|
||||
}
|
||||
|
||||
float get_hierarchy_probability(float *x, tree *hier, int c)
|
||||
float get_hierarchy_probability(float *x, tree *hier, int c, int stride)
|
||||
{
|
||||
float p = 1;
|
||||
while(c >= 0){
|
||||
p = p * x[c];
|
||||
p = p * x[c*stride];
|
||||
c = hier->parent[c];
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves)
|
||||
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride)
|
||||
{
|
||||
int j;
|
||||
for(j = 0; j < n; ++j){
|
||||
int parent = hier->parent[j];
|
||||
if(parent >= 0){
|
||||
predictions[j] *= predictions[parent];
|
||||
predictions[j*stride] *= predictions[parent*stride];
|
||||
}
|
||||
}
|
||||
if(only_leaves){
|
||||
for(j = 0; j < n; ++j){
|
||||
if(!hier->leaf[j]) predictions[j] = 0;
|
||||
if(!hier->leaf[j]) predictions[j*stride] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh)
|
||||
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride)
|
||||
{
|
||||
float p = 1;
|
||||
int group = 0;
|
||||
@ -61,7 +61,7 @@ int hierarchy_top_prediction(float *predictions, tree *hier, float thresh)
|
||||
|
||||
for(i = 0; i < hier->group_size[group]; ++i){
|
||||
int index = i + hier->group_offset[group];
|
||||
float val = predictions[i + hier->group_offset[group]];
|
||||
float val = predictions[(i + hier->group_offset[group])*stride];
|
||||
if(val > max){
|
||||
max_i = index;
|
||||
max = val;
|
||||
|
@ -15,9 +15,9 @@ typedef struct{
|
||||
} tree;
|
||||
|
||||
tree *read_tree(char *filename);
|
||||
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves);
|
||||
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride);
|
||||
void change_leaves(tree *t, char *leaf_list);
|
||||
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh);
|
||||
float get_hierarchy_probability(float *x, tree *hier, int c);
|
||||
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride);
|
||||
float get_hierarchy_probability(float *x, tree *hier, int c, int stride);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user