mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
updates and things
This commit is contained in:
parent
aebe937710
commit
8f1b4e0962
1
.gitignore
vendored
1
.gitignore
vendored
@ -14,6 +14,7 @@ decaf/
|
||||
submission/
|
||||
cfg/
|
||||
darknet
|
||||
.fuse*
|
||||
|
||||
# OS Generated #
|
||||
.DS_Store*
|
||||
|
8
Makefile
8
Makefile
@ -1,6 +1,6 @@
|
||||
GPU=1
|
||||
CUDNN=1
|
||||
OPENCV=1
|
||||
GPU=0
|
||||
CUDNN=0
|
||||
OPENCV=0
|
||||
DEBUG=0
|
||||
|
||||
ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
|
||||
@ -41,7 +41,7 @@ CFLAGS+= -DCUDNN
|
||||
LDFLAGS+= -lcudnn
|
||||
endif
|
||||
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
|
||||
ifeq ($(GPU), 1)
|
||||
LDFLAGS+= -lstdc++
|
||||
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
|
||||
|
209
cfg/extraction22k.cfg
Normal file
209
cfg/extraction22k.cfg
Normal file
@ -0,0 +1,209 @@
|
||||
[net]
|
||||
batch=128
|
||||
subdivisions=1
|
||||
height=224
|
||||
width=224
|
||||
max_crop=320
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
|
||||
learning_rate=0.01
|
||||
max_batches = 0
|
||||
policy=steps
|
||||
steps=444000,590000,970000
|
||||
scales=.5,.2,.1
|
||||
|
||||
#policy=sigmoid
|
||||
#gamma=.00008
|
||||
#step=100000
|
||||
#max_batches=200000
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=7
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=2048
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=2048
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[avgpool]
|
||||
|
||||
[connected]
|
||||
output=21842
|
||||
activation=leaky
|
||||
|
||||
[softmax]
|
||||
groups=1
|
||||
|
||||
[cost]
|
||||
type=sse
|
||||
|
126
cfg/go.test.cfg
126
cfg/go.test.cfg
@ -3,102 +3,126 @@ batch=1
|
||||
subdivisions=1
|
||||
height=19
|
||||
width=19
|
||||
channels=8
|
||||
channels=1
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
|
||||
learning_rate=0.1
|
||||
max_batches = 0
|
||||
policy=steps
|
||||
steps=50000
|
||||
scales=.1
|
||||
policy=poly
|
||||
power=4
|
||||
max_batches=400000
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
|
||||
[convolutional]
|
||||
filters=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=linear
|
||||
|
||||
[softmax]
|
||||
|
||||
|
@ -1,9 +1,8 @@
|
||||
classes=1000
|
||||
labels = data/inet.labels.list
|
||||
names = data/shortnames.txt
|
||||
train = /data/imagenet/imagenet1k.train.list
|
||||
valid = /data/imagenet/imagenet1k.valid.list
|
||||
top=5
|
||||
test = /Users/pjreddie/Documents/sites/selfie/paths.list
|
||||
backup = /home/pjreddie/backup/
|
||||
labels = data/imagenet.labels.list
|
||||
names = data/imagenet.shortnames.list
|
||||
top=5
|
||||
|
||||
|
16
cfg/yolo.cfg
16
cfg/yolo.cfg
@ -1,11 +1,14 @@
|
||||
[net]
|
||||
batch=64
|
||||
subdivisions=2
|
||||
batch=1
|
||||
subdivisions=1
|
||||
height=448
|
||||
width=448
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
saturation=1.5
|
||||
exposure=1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.0005
|
||||
policy=steps
|
||||
@ -13,15 +16,6 @@ steps=200,400,600,20000,30000
|
||||
scales=2.5,2,2,.1,.1
|
||||
max_batches = 40000
|
||||
|
||||
[crop]
|
||||
crop_width=448
|
||||
crop_height=448
|
||||
flip=0
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
noadjust=1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
|
257
cfg/yolo.train.cfg
Normal file
257
cfg/yolo.train.cfg
Normal file
@ -0,0 +1,257 @@
|
||||
[net]
|
||||
batch=64
|
||||
subdivisions=4
|
||||
height=448
|
||||
width=448
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
saturation=1.5
|
||||
exposure=1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.0005
|
||||
policy=steps
|
||||
steps=200,400,600,20000,30000
|
||||
scales=2.5,2,2,.1,.1
|
||||
max_batches = 40000
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=7
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[local]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[dropout]
|
||||
probability=.5
|
||||
|
||||
[connected]
|
||||
output= 1715
|
||||
activation=linear
|
||||
|
||||
[detection]
|
||||
classes=20
|
||||
coords=4
|
||||
rescore=1
|
||||
side=7
|
||||
num=3
|
||||
softmax=0
|
||||
sqrt=1
|
||||
jitter=.2
|
||||
|
||||
object_scale=1
|
||||
noobject_scale=.5
|
||||
class_scale=1
|
||||
coord_scale=5
|
||||
|
BIN
data/dog.jpg
BIN
data/dog.jpg
Binary file not shown.
Before Width: | Height: | Size: 160 KiB After Width: | Height: | Size: 160 KiB |
21842
data/imagenet.labels.list
Normal file
21842
data/imagenet.labels.list
Normal file
File diff suppressed because it is too large
Load Diff
21842
data/imagenet.shortnames.list
Normal file
21842
data/imagenet.shortnames.list
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1000
data/shortnames.txt
1000
data/shortnames.txt
File diff suppressed because it is too large
Load Diff
@ -40,6 +40,7 @@ void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float
|
||||
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
|
||||
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
||||
void scal_ongpu(int N, float ALPHA, float * X, int INCX);
|
||||
void supp_ongpu(int N, float ALPHA, float * X, int INCX);
|
||||
void mask_ongpu(int N, float * X, float mask_num, float * mask);
|
||||
void const_ongpu(int N, float ALPHA, float *X, int INCX);
|
||||
void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
|
||||
|
@ -368,6 +368,14 @@ __global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX)
|
||||
if(i < N) X[i*INCX] = min(ALPHA, max(-ALPHA, X[i*INCX]));
|
||||
}
|
||||
|
||||
__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(i < N) {
|
||||
if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
@ -552,6 +560,12 @@ extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void supp_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||
{
|
||||
supp_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||
{
|
||||
fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||
@ -633,6 +647,7 @@ extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *er
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
|
222
src/classifier.c
222
src/classifier.c
@ -39,6 +39,18 @@ list *read_data_cfg(char *filename)
|
||||
return options;
|
||||
}
|
||||
|
||||
float *get_regression_values(char **labels, int n)
|
||||
{
|
||||
float *v = calloc(n, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
char *p = strchr(labels[i], ' ');
|
||||
*p = 0;
|
||||
v[i] = atof(p+1);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
||||
{
|
||||
int nthreads = 8;
|
||||
@ -85,6 +97,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.size = net.w;
|
||||
|
||||
args.paths = paths;
|
||||
@ -116,6 +129,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
|
||||
#ifdef OPENCV
|
||||
if(0){
|
||||
int u;
|
||||
for(u = 0; u < imgs; ++u){
|
||||
@ -124,6 +138,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
||||
cvWaitKey(0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
float loss = train_network(net, train);
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
@ -440,7 +455,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
|
||||
|
||||
char **labels = get_labels(label_list);
|
||||
list *plist = get_paths(valid_list);
|
||||
int scales[] = {192, 224, 288, 320, 352};
|
||||
int scales[] = {224, 288, 320, 352, 384};
|
||||
int nscales = sizeof(scales)/sizeof(scales[0]);
|
||||
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
@ -484,6 +499,88 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
|
||||
}
|
||||
}
|
||||
|
||||
void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
|
||||
list *options = read_data_cfg(datacfg);
|
||||
|
||||
char *name_list = option_find_str(options, "names", 0);
|
||||
if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
|
||||
int top = option_find_int(options, "top", 1);
|
||||
|
||||
int i = 0;
|
||||
char **names = get_labels(name_list);
|
||||
clock_t time;
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image orig = load_image_color(input, 0, 0);
|
||||
image r = resize_min(orig, 256);
|
||||
image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224);
|
||||
float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742};
|
||||
float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583};
|
||||
float var[3];
|
||||
var[0] = std[0]*std[0];
|
||||
var[1] = std[1]*std[1];
|
||||
var[2] = std[2]*std[2];
|
||||
|
||||
normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h);
|
||||
|
||||
float *X = im.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
|
||||
layer l = net.layers[layer_num];
|
||||
for(i = 0; i < l.c; ++i){
|
||||
if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]);
|
||||
}
|
||||
#ifdef GPU
|
||||
cuda_pull_array(l.output_gpu, l.output, l.outputs);
|
||||
#endif
|
||||
for(i = 0; i < l.outputs; ++i){
|
||||
printf("%f\n", l.output[i]);
|
||||
}
|
||||
/*
|
||||
|
||||
printf("\n\nWeights\n");
|
||||
for(i = 0; i < l.n*l.size*l.size*l.c; ++i){
|
||||
printf("%f\n", l.filters[i]);
|
||||
}
|
||||
|
||||
printf("\n\nBiases\n");
|
||||
for(i = 0; i < l.n; ++i){
|
||||
printf("%f\n", l.biases[i]);
|
||||
}
|
||||
*/
|
||||
|
||||
top_predictions(net, top, indexes);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
for(i = 0; i < top; ++i){
|
||||
int index = indexes[i];
|
||||
printf("%s: %f\n", names[index], predictions[index]);
|
||||
}
|
||||
free_image(im);
|
||||
if (filename) break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
@ -649,6 +746,127 @@ void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_
|
||||
}
|
||||
|
||||
|
||||
void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
float threat = 0;
|
||||
float roll = .2;
|
||||
|
||||
printf("Classifier Demo\n");
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
list *options = read_data_cfg(datacfg);
|
||||
|
||||
srand(2222222);
|
||||
CvCapture * cap;
|
||||
|
||||
if(filename){
|
||||
cap = cvCaptureFromFile(filename);
|
||||
}else{
|
||||
cap = cvCaptureFromCAM(cam_index);
|
||||
}
|
||||
|
||||
int top = option_find_int(options, "top", 1);
|
||||
|
||||
char *name_list = option_find_str(options, "names", 0);
|
||||
char **names = get_labels(name_list);
|
||||
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
|
||||
if(!cap) error("Couldn't connect to webcam.\n");
|
||||
//cvNamedWindow("Threat", CV_WINDOW_NORMAL);
|
||||
//cvResizeWindow("Threat", 512, 512);
|
||||
float fps = 0;
|
||||
int i;
|
||||
|
||||
int count = 0;
|
||||
|
||||
while(1){
|
||||
++count;
|
||||
struct timeval tval_before, tval_after, tval_result;
|
||||
gettimeofday(&tval_before, NULL);
|
||||
|
||||
image in = get_image_from_stream(cap);
|
||||
if(!in.data) break;
|
||||
image in_s = resize_image(in, net.w, net.h);
|
||||
|
||||
image out = in;
|
||||
int x1 = out.w / 20;
|
||||
int y1 = out.h / 20;
|
||||
int x2 = 2*x1;
|
||||
int y2 = out.h - out.h/20;
|
||||
|
||||
int border = .01*out.h;
|
||||
int h = y2 - y1 - 2*border;
|
||||
int w = x2 - x1 - 2*border;
|
||||
|
||||
float *predictions = network_predict(net, in_s.data);
|
||||
float curr_threat = predictions[0] * 0 + predictions[1] * .6 + predictions[2];
|
||||
threat = roll * curr_threat + (1-roll) * threat;
|
||||
|
||||
draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0);
|
||||
if(threat > .97) {
|
||||
draw_box_width(out, x2 + .5 * w + border,
|
||||
y1 + .02*h - 2*border,
|
||||
x2 + .5 * w + 6*border,
|
||||
y1 + .02*h + 3*border, 3*border, 1,0,0);
|
||||
}
|
||||
draw_box_width(out, x2 + .5 * w + border,
|
||||
y1 + .02*h - 2*border,
|
||||
x2 + .5 * w + 6*border,
|
||||
y1 + .02*h + 3*border, .5*border, 0,0,0);
|
||||
draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0);
|
||||
if(threat > .57) {
|
||||
draw_box_width(out, x2 + .5 * w + border,
|
||||
y1 + .42*h - 2*border,
|
||||
x2 + .5 * w + 6*border,
|
||||
y1 + .42*h + 3*border, 3*border, 1,1,0);
|
||||
}
|
||||
draw_box_width(out, x2 + .5 * w + border,
|
||||
y1 + .42*h - 2*border,
|
||||
x2 + .5 * w + 6*border,
|
||||
y1 + .42*h + 3*border, .5*border, 0,0,0);
|
||||
|
||||
draw_box_width(out, x1, y1, x2, y2, border, 0,0,0);
|
||||
for(i = 0; i < threat * h ; ++i){
|
||||
float ratio = (float) i / h;
|
||||
float r = (ratio < .5) ? (2*(ratio)) : 1;
|
||||
float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5);
|
||||
draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0);
|
||||
}
|
||||
top_predictions(net, top, indexes);
|
||||
char buff[256];
|
||||
sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
|
||||
save_image(out, buff);
|
||||
|
||||
printf("\033[2J");
|
||||
printf("\033[1;1H");
|
||||
printf("\nFPS:%.0f\n",fps);
|
||||
|
||||
for(i = 0; i < top; ++i){
|
||||
int index = indexes[i];
|
||||
printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
|
||||
}
|
||||
|
||||
if(0){
|
||||
show_image(out, "Threat");
|
||||
cvWaitKey(10);
|
||||
}
|
||||
free_image(in_s);
|
||||
free_image(in);
|
||||
|
||||
gettimeofday(&tval_after, NULL);
|
||||
timersub(&tval_after, &tval_before, &tval_result);
|
||||
float curr = 1000000.f/((long int)tval_result.tv_usec);
|
||||
fps = .9*fps + .1*curr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
@ -732,8 +950,10 @@ void run_classifier(int argc, char **argv)
|
||||
char *layer_s = (argc > 7) ? argv[7]: 0;
|
||||
int layer = layer_s ? atoi(layer_s) : -1;
|
||||
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename);
|
||||
else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
|
||||
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, clear);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
|
||||
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
|
||||
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
|
||||
else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights);
|
||||
|
11
src/coco.c
11
src/coco.c
@ -25,6 +25,7 @@ void train_coco(char *cfgfile, char *weightfile)
|
||||
//char *train_images = "/home/pjreddie/data/voc/test/train.txt";
|
||||
//char *train_images = "/home/pjreddie/data/coco/train.txt";
|
||||
char *train_images = "data/coco.trainval.txt";
|
||||
//char *train_images = "data/bags.train.list";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
srand(time(0));
|
||||
data_seed = time(0);
|
||||
@ -63,6 +64,11 @@ void train_coco(char *cfgfile, char *weightfile)
|
||||
args.d = &buffer;
|
||||
args.type = REGION_DATA;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
@ -94,6 +100,11 @@ void train_coco(char *cfgfile, char *weightfile)
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(i%100==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
free_data(train);
|
||||
}
|
||||
char buff[256];
|
||||
|
10
src/col2im.c
10
src/col2im.c
@ -16,13 +16,9 @@ void col2im_cpu(float* data_col,
|
||||
int ksize, int stride, int pad, float* data_im)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = (height - ksize) / stride + 1;
|
||||
int width_col = (width - ksize) / stride + 1;
|
||||
if (pad){
|
||||
height_col = 1 + (height-1) / stride;
|
||||
width_col = 1 + (width-1) / stride;
|
||||
pad = ksize/2;
|
||||
}
|
||||
int height_col = (height + 2*pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2*pad - ksize) / stride + 1;
|
||||
|
||||
int channels_col = channels * ksize * ksize;
|
||||
for (c = 0; c < channels_col; ++c) {
|
||||
int w_offset = c % ksize;
|
||||
|
@ -46,7 +46,6 @@ void col2im_ongpu(float *data_col,
|
||||
int ksize, int stride, int pad, float *data_im){
|
||||
// We are going to launch channels * height_col * width_col kernels, each
|
||||
// kernel responsible for copying a single-channel grid.
|
||||
pad = pad ? ksize/2 : 0;
|
||||
int height_col = (height + 2 * pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2 * pad - ksize) / stride + 1;
|
||||
int num_kernels = channels * height * width;
|
||||
|
@ -17,7 +17,7 @@ __global__ void binarize_kernel(float *x, int n, float *binary)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (i >= n) return;
|
||||
binary[i] = (x[i] > 0) ? 1 : -1;
|
||||
binary[i] = (x[i] >= 0) ? 1 : -1;
|
||||
}
|
||||
|
||||
void binarize_gpu(float *x, int n, float *binary)
|
||||
@ -60,6 +60,7 @@ __global__ void binarize_filters_kernel(float *filters, int n, int size, float *
|
||||
mean = mean / size;
|
||||
for(i = 0; i < size; ++i){
|
||||
binary[f*size + i] = (filters[f*size + i] > 0) ? mean : -mean;
|
||||
//binary[f*size + i] = filters[f*size + i];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,18 +70,12 @@ void binarize_input(float *input, int n, int size, float *binary)
|
||||
|
||||
int convolutional_out_height(convolutional_layer l)
|
||||
{
|
||||
int h = l.h;
|
||||
if (!l.pad) h -= l.size;
|
||||
else h -= 1;
|
||||
return h/l.stride + 1;
|
||||
return (l.h + 2*l.pad - l.size) / l.stride + 1;
|
||||
}
|
||||
|
||||
int convolutional_out_width(convolutional_layer l)
|
||||
{
|
||||
int w = l.w;
|
||||
if (!l.pad) w -= l.size;
|
||||
else w -= 1;
|
||||
return w/l.stride + 1;
|
||||
return (l.w + 2*l.pad - l.size) / l.stride + 1;
|
||||
}
|
||||
|
||||
image get_convolutional_image(convolutional_layer l)
|
||||
@ -148,8 +142,7 @@ void cudnn_convolutional_setup(layer *l)
|
||||
cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
|
||||
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
cudnnSetFilter4dDescriptor(l->filterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
|
||||
int padding = l->pad ? l->size/2 : 0;
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, padding, padding, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
|
||||
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
|
||||
l->srcTensorDesc,
|
||||
l->filterDesc,
|
||||
@ -178,7 +171,7 @@ void cudnn_convolutional_setup(layer *l)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor)
|
||||
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor)
|
||||
{
|
||||
int i;
|
||||
convolutional_layer l = {0};
|
||||
@ -193,7 +186,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
||||
l.batch = batch;
|
||||
l.stride = stride;
|
||||
l.size = size;
|
||||
l.pad = pad;
|
||||
l.pad = padding;
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
l.filters = calloc(c*n*size*size, sizeof(float));
|
||||
|
@ -98,6 +98,15 @@ void push_cost_layer(cost_layer l)
|
||||
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
|
||||
}
|
||||
|
||||
int float_abs_compare (const void * a, const void * b)
|
||||
{
|
||||
float fa = *(const float*) a;
|
||||
if(fa < 0) fa = -fa;
|
||||
float fb = *(const float*) b;
|
||||
if(fb < 0) fb = -fb;
|
||||
return (fa > fb) - (fa < fb);
|
||||
}
|
||||
|
||||
void forward_cost_layer_gpu(cost_layer l, network_state state)
|
||||
{
|
||||
if (!state.truth) return;
|
||||
@ -111,6 +120,16 @@ void forward_cost_layer_gpu(cost_layer l, network_state state)
|
||||
l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
|
||||
}
|
||||
|
||||
if(l.ratio){
|
||||
cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
|
||||
qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
|
||||
int n = (1-l.ratio) * l.batch*l.inputs;
|
||||
float thresh = l.delta[n];
|
||||
thresh = 0;
|
||||
printf("%f\n", thresh);
|
||||
supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
|
||||
}
|
||||
|
||||
cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
|
||||
l.cost[0] = sum_array(l.output, l.batch*l.inputs);
|
||||
}
|
||||
|
@ -13,7 +13,6 @@
|
||||
#endif
|
||||
|
||||
extern void run_voxel(int argc, char **argv);
|
||||
extern void run_imagenet(int argc, char **argv);
|
||||
extern void run_yolo(int argc, char **argv);
|
||||
extern void run_detector(int argc, char **argv);
|
||||
extern void run_coco(int argc, char **argv);
|
||||
@ -327,9 +326,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
#endif
|
||||
|
||||
if(0==strcmp(argv[1], "imagenet")){
|
||||
run_imagenet(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "average")){
|
||||
if (0 == strcmp(argv[1], "average")){
|
||||
average(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "yolo")){
|
||||
run_yolo(argc, argv);
|
||||
|
37
src/data.c
37
src/data.c
@ -100,7 +100,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
|
||||
return X;
|
||||
}
|
||||
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float exposure, float saturation)
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
||||
{
|
||||
int i;
|
||||
matrix X;
|
||||
@ -113,10 +113,7 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,
|
||||
image crop = random_augment_image(im, angle, min, max, size);
|
||||
int flip = rand_r(&data_seed)%2;
|
||||
if (flip) flip_image(crop);
|
||||
float exp = rand_uniform(1./exposure, exposure);
|
||||
float sat = rand_uniform(1./saturation, saturation);
|
||||
exposure_image(crop, exp);
|
||||
exposure_image(crop, sat);
|
||||
random_distort_image(crop, hue, saturation, exposure);
|
||||
|
||||
/*
|
||||
show_image(im, "orig");
|
||||
@ -241,6 +238,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
|
||||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".png", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPG", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
int count = 0;
|
||||
@ -287,6 +285,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
|
||||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".png", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPG", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
int count = 0;
|
||||
@ -443,7 +442,7 @@ void free_data(data d)
|
||||
}
|
||||
}
|
||||
|
||||
data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter)
|
||||
data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure)
|
||||
{
|
||||
char **random_paths = get_random_paths(paths, n, m);
|
||||
int i;
|
||||
@ -485,6 +484,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
|
||||
|
||||
image sized = resize_image(cropped, w, h);
|
||||
if(flip) flip_image(sized);
|
||||
random_distort_image(sized, hue, saturation, exposure);
|
||||
d.X.vals[i] = sized.data;
|
||||
|
||||
fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy);
|
||||
@ -611,7 +611,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter)
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure)
|
||||
{
|
||||
char **random_paths = get_random_paths(paths, n, m);
|
||||
int i;
|
||||
@ -651,6 +651,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
|
||||
|
||||
image sized = resize_image(cropped, w, h);
|
||||
if(flip) flip_image(sized);
|
||||
random_distort_image(sized, hue, saturation, exposure);
|
||||
d.X.vals[i] = sized.data;
|
||||
|
||||
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
|
||||
@ -679,17 +680,17 @@ void *load_thread(void *ptr)
|
||||
if (a.type == OLD_CLASSIFICATION_DATA){
|
||||
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||
} else if (a.type == CLASSIFICATION_DATA){
|
||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
|
||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == SUPER_DATA){
|
||||
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
|
||||
} else if (a.type == STUDY_DATA){
|
||||
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
|
||||
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == WRITING_DATA){
|
||||
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
|
||||
} else if (a.type == REGION_DATA){
|
||||
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
|
||||
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == DETECTION_DATA){
|
||||
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
|
||||
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == SWAG_DATA){
|
||||
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
|
||||
} else if (a.type == COMPARE_DATA){
|
||||
@ -698,7 +699,7 @@ void *load_thread(void *ptr)
|
||||
*(a.im) = load_image_color(a.path, 0, 0);
|
||||
*(a.resized) = resize_image(*(a.im), a.w, a.h);
|
||||
} else if (a.type == TAG_DATA){
|
||||
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
|
||||
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
||||
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||
}
|
||||
free(ptr);
|
||||
@ -740,13 +741,13 @@ data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
|
||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
||||
{
|
||||
data d = {0};
|
||||
d.indexes = calloc(n, sizeof(int));
|
||||
if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
||||
d.y = load_labels_paths(paths, n, labels, k);
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
@ -782,25 +783,25 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
||||
{
|
||||
if(m) paths = get_random_paths(paths, n, m);
|
||||
data d = {0};
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
||||
d.y = load_labels_paths(paths, n, labels, k);
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation)
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
||||
{
|
||||
if(m) paths = get_random_paths(paths, n, m);
|
||||
data d = {0};
|
||||
d.w = size;
|
||||
d.h = size;
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
||||
d.y = load_tags_paths(paths, n, k);
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
|
10
src/data.h
10
src/data.h
@ -54,6 +54,7 @@ typedef struct load_args{
|
||||
float angle;
|
||||
float saturation;
|
||||
float exposure;
|
||||
float hue;
|
||||
data *d;
|
||||
image *im;
|
||||
image *resized;
|
||||
@ -74,11 +75,12 @@ void print_letters(float *pred, int n);
|
||||
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
|
||||
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
|
||||
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation);
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
|
||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
||||
data load_go(char *filename);
|
||||
|
||||
box_label *read_boxes(char *filename, int *n);
|
||||
|
10
src/demo.c
10
src/demo.c
@ -8,7 +8,7 @@
|
||||
#include "demo.h"
|
||||
#include <sys/time.h>
|
||||
|
||||
#define FRAMES 1
|
||||
#define FRAMES 3
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
@ -48,7 +48,7 @@ void *fetch_in_thread(void *ptr)
|
||||
|
||||
void *detect_in_thread(void *ptr)
|
||||
{
|
||||
float nms = .4;
|
||||
float nms = .1;
|
||||
|
||||
detection_layer l = net.layers[net.n-1];
|
||||
float *X = det_s.data;
|
||||
@ -153,6 +153,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
||||
if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
|
||||
if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
|
||||
|
||||
if(1){
|
||||
show_image(disp, "Demo");
|
||||
int c = cvWaitKey(1);
|
||||
if (c == 10){
|
||||
@ -161,6 +162,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
||||
else if(frame_skip == 60) frame_skip = 4;
|
||||
else frame_skip = 0;
|
||||
}
|
||||
}else{
|
||||
char buff[256];
|
||||
sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count);
|
||||
save_image(disp, buff);
|
||||
}
|
||||
|
||||
pthread_join(fetch_thread, 0);
|
||||
pthread_join(detect_thread, 0);
|
||||
|
@ -22,6 +22,8 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
|
||||
l.coords = coords;
|
||||
l.rescore = rescore;
|
||||
l.side = side;
|
||||
l.w = side;
|
||||
l.h = side;
|
||||
assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
|
||||
l.cost = calloc(1, sizeof(float));
|
||||
l.outputs = l.inputs;
|
||||
@ -44,6 +46,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
|
||||
int locations = l.side*l.side;
|
||||
int i,j;
|
||||
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
|
||||
//if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1);
|
||||
int b;
|
||||
if (l.softmax){
|
||||
for(b = 0; b < l.batch; ++b){
|
||||
@ -204,6 +207,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
|
||||
|
||||
|
||||
printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
|
||||
//if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -51,6 +51,11 @@ void train_detector(char *cfgfile, char *weightfile)
|
||||
args.d = &buffer;
|
||||
args.type = DETECTION_DATA;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
|
10
src/im2col.c
10
src/im2col.c
@ -18,13 +18,9 @@ void im2col_cpu(float* data_im,
|
||||
int ksize, int stride, int pad, float* data_col)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = (height - ksize) / stride + 1;
|
||||
int width_col = (width - ksize) / stride + 1;
|
||||
if (pad){
|
||||
height_col = 1 + (height-1) / stride;
|
||||
width_col = 1 + (width-1) / stride;
|
||||
pad = ksize/2;
|
||||
}
|
||||
int height_col = (height + 2*pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2*pad - ksize) / stride + 1;
|
||||
|
||||
int channels_col = channels * ksize * ksize;
|
||||
for (c = 0; c < channels_col; ++c) {
|
||||
int w_offset = c % ksize;
|
||||
|
@ -33,8 +33,12 @@ __global__ void im2col_gpu_kernel(const int n, const float* data_im,
|
||||
for (int j = 0; j < ksize; ++j) {
|
||||
int h = h_in + i;
|
||||
int w = w_in + j;
|
||||
|
||||
*data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
|
||||
data_im_ptr[i * width + j] : 0;
|
||||
|
||||
//*data_col_ptr = data_im_ptr[ii * width + jj];
|
||||
|
||||
data_col_ptr += height_col * width_col;
|
||||
}
|
||||
}
|
||||
@ -46,7 +50,6 @@ void im2col_ongpu(float *im,
|
||||
int ksize, int stride, int pad, float *data_col){
|
||||
// We are going to launch channels * height_col * width_col kernels, each
|
||||
// kernel responsible for copying a single-channel grid.
|
||||
pad = pad ? ksize/2 : 0;
|
||||
int height_col = (height + 2 * pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2 * pad - ksize) / stride + 1;
|
||||
int num_kernels = channels * height_col * width_col;
|
||||
@ -56,93 +59,3 @@ void im2col_ongpu(float *im,
|
||||
stride, height_col,
|
||||
width_col, data_col);
|
||||
}
|
||||
/*
|
||||
__global__ void im2col_pad_kernel(float *im,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, float *data_col)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = 1 + (height-1) / stride;
|
||||
int width_col = 1 + (width-1) / stride;
|
||||
int channels_col = channels * ksize * ksize;
|
||||
|
||||
int pad = ksize/2;
|
||||
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
int col_size = height_col*width_col*channels_col;
|
||||
if (id >= col_size) return;
|
||||
|
||||
int col_index = id;
|
||||
w = id % width_col;
|
||||
id /= width_col;
|
||||
h = id % height_col;
|
||||
id /= height_col;
|
||||
c = id % channels_col;
|
||||
id /= channels_col;
|
||||
|
||||
int w_offset = c % ksize;
|
||||
int h_offset = (c / ksize) % ksize;
|
||||
int im_channel = c / ksize / ksize;
|
||||
int im_row = h_offset + h * stride - pad;
|
||||
int im_col = w_offset + w * stride - pad;
|
||||
|
||||
int im_index = im_col + width*(im_row + height*im_channel);
|
||||
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
|
||||
|
||||
data_col[col_index] = val;
|
||||
}
|
||||
|
||||
__global__ void im2col_nopad_kernel(float *im,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, float *data_col)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = (height - ksize) / stride + 1;
|
||||
int width_col = (width - ksize) / stride + 1;
|
||||
int channels_col = channels * ksize * ksize;
|
||||
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
int col_size = height_col*width_col*channels_col;
|
||||
if (id >= col_size) return;
|
||||
|
||||
int col_index = id;
|
||||
w = id % width_col;
|
||||
id /= width_col;
|
||||
h = id % height_col;
|
||||
id /= height_col;
|
||||
c = id % channels_col;
|
||||
id /= channels_col;
|
||||
|
||||
int w_offset = c % ksize;
|
||||
int h_offset = (c / ksize) % ksize;
|
||||
int im_channel = c / ksize / ksize;
|
||||
int im_row = h_offset + h * stride;
|
||||
int im_col = w_offset + w * stride;
|
||||
|
||||
int im_index = im_col + width*(im_row + height*im_channel);
|
||||
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
|
||||
|
||||
data_col[col_index] = val;
|
||||
}
|
||||
|
||||
extern "C" void im2col_ongpu(float *im,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, int pad, float *data_col)
|
||||
{
|
||||
|
||||
int height_col = (height - ksize) / stride + 1;
|
||||
int width_col = (width - ksize) / stride + 1;
|
||||
int channels_col = channels * ksize * ksize;
|
||||
|
||||
if (pad){
|
||||
height_col = 1 + (height-1) / stride;
|
||||
width_col = 1 + (width-1) / stride;
|
||||
}
|
||||
|
||||
size_t n = channels_col*height_col*width_col;
|
||||
|
||||
if(pad)im2col_pad_kernel<<<cuda_gridsize(n),BLOCK>>>(im, channels, height, width, ksize, stride, data_col);
|
||||
else im2col_nopad_kernel<<<cuda_gridsize(n),BLOCK>>>(im, channels, height, width, ksize, stride, data_col);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
*/
|
||||
|
257
src/image.c
257
src/image.c
@ -1,6 +1,7 @@
|
||||
#include "image.h"
|
||||
#include "utils.h"
|
||||
#include "blas.h"
|
||||
#include "cuda.h"
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
@ -247,6 +248,26 @@ void constrain_image(image im)
|
||||
}
|
||||
|
||||
void normalize_image(image p)
|
||||
{
|
||||
int i;
|
||||
float min = 9999999;
|
||||
float max = -999999;
|
||||
|
||||
for(i = 0; i < p.h*p.w*p.c; ++i){
|
||||
float v = p.data[i];
|
||||
if(v < min) min = v;
|
||||
if(v > max) max = v;
|
||||
}
|
||||
if(max - min < .000000001){
|
||||
min = 0;
|
||||
max = 1;
|
||||
}
|
||||
for(i = 0; i < p.c*p.w*p.h; ++i){
|
||||
p.data[i] = (p.data[i] - min)/(max-min);
|
||||
}
|
||||
}
|
||||
|
||||
void normalize_image2(image p)
|
||||
{
|
||||
float *min = calloc(p.c, sizeof(float));
|
||||
float *max = calloc(p.c, sizeof(float));
|
||||
@ -320,7 +341,6 @@ void show_image_cv(image p, const char *name)
|
||||
}
|
||||
free_image(copy);
|
||||
if(0){
|
||||
//if(disp->height < 448 || disp->width < 448 || disp->height > 1000){
|
||||
int w = 448;
|
||||
int h = w*p.h/p.w;
|
||||
if(h > 1000){
|
||||
@ -334,35 +354,35 @@ void show_image_cv(image p, const char *name)
|
||||
}
|
||||
cvShowImage(buff, disp);
|
||||
cvReleaseImage(&disp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void show_image(image p, const char *name)
|
||||
{
|
||||
void show_image(image p, const char *name)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
show_image_cv(p, name);
|
||||
#else
|
||||
fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
|
||||
save_image(p, name);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OPENCV
|
||||
image get_image_from_stream(CvCapture *cap)
|
||||
{
|
||||
image get_image_from_stream(CvCapture *cap)
|
||||
{
|
||||
IplImage* src = cvQueryFrame(cap);
|
||||
if (!src) return make_empty_image(0,0,0);
|
||||
image im = ipl_to_image(src);
|
||||
rgbgr_image(im);
|
||||
return im;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV
|
||||
void save_image_jpg(image p, const char *name)
|
||||
{
|
||||
void save_image_jpg(image p, const char *name)
|
||||
{
|
||||
image copy = copy_image(p);
|
||||
rgbgr_image(copy);
|
||||
if(p.c == 3) rgbgr_image(copy);
|
||||
int x,y,k;
|
||||
|
||||
char buff[256];
|
||||
@ -380,14 +400,14 @@ void show_image_cv(image p, const char *name)
|
||||
cvSaveImage(buff, disp,0);
|
||||
cvReleaseImage(&disp);
|
||||
free_image(copy);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void save_image(image im, const char *name)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
void save_image(image im, const char *name)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
save_image_jpg(im, name);
|
||||
#else
|
||||
#else
|
||||
char buff[256];
|
||||
//sprintf(buff, "%s (%d)", name, windows);
|
||||
sprintf(buff, "%s.png", name);
|
||||
@ -401,12 +421,12 @@ void show_image_cv(image p, const char *name)
|
||||
int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
|
||||
free(data);
|
||||
if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void show_image_layers(image p, char *name)
|
||||
{
|
||||
void show_image_layers(image p, char *name)
|
||||
{
|
||||
int i;
|
||||
char buff[256];
|
||||
for(i = 0; i < p.c; ++i){
|
||||
@ -415,34 +435,34 @@ void show_image_cv(image p, const char *name)
|
||||
show_image(layer, buff);
|
||||
free_image(layer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void show_image_collapsed(image p, char *name)
|
||||
{
|
||||
void show_image_collapsed(image p, char *name)
|
||||
{
|
||||
image c = collapse_image_layers(p, 1);
|
||||
show_image(c, name);
|
||||
free_image(c);
|
||||
}
|
||||
}
|
||||
|
||||
image make_empty_image(int w, int h, int c)
|
||||
{
|
||||
image make_empty_image(int w, int h, int c)
|
||||
{
|
||||
image out;
|
||||
out.data = 0;
|
||||
out.h = h;
|
||||
out.w = w;
|
||||
out.c = c;
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
image make_image(int w, int h, int c)
|
||||
{
|
||||
image make_image(int w, int h, int c)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = calloc(h*w*c, sizeof(float));
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
image make_random_image(int w, int h, int c)
|
||||
{
|
||||
image make_random_image(int w, int h, int c)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = calloc(h*w*c, sizeof(float));
|
||||
int i;
|
||||
@ -450,17 +470,17 @@ void show_image_cv(image p, const char *name)
|
||||
out.data[i] = (rand_normal() * .25) + .5;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
image float_to_image(int w, int h, int c, float *data)
|
||||
{
|
||||
image float_to_image(int w, int h, int c, float *data)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = data;
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
|
||||
{
|
||||
image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
|
||||
{
|
||||
int x, y, c;
|
||||
float cx = im.w/2.;
|
||||
float cy = im.h/2.;
|
||||
@ -476,10 +496,10 @@ void show_image_cv(image p, const char *name)
|
||||
}
|
||||
}
|
||||
return rot;
|
||||
}
|
||||
}
|
||||
|
||||
image rotate_image(image im, float rad)
|
||||
{
|
||||
image rotate_image(image im, float rad)
|
||||
{
|
||||
int x, y, c;
|
||||
float cx = im.w/2.;
|
||||
float cy = im.h/2.;
|
||||
@ -495,22 +515,22 @@ void show_image_cv(image p, const char *name)
|
||||
}
|
||||
}
|
||||
return rot;
|
||||
}
|
||||
}
|
||||
|
||||
void translate_image(image m, float s)
|
||||
{
|
||||
void translate_image(image m, float s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
|
||||
}
|
||||
}
|
||||
|
||||
void scale_image(image m, float s)
|
||||
{
|
||||
void scale_image(image m, float s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
|
||||
}
|
||||
}
|
||||
|
||||
image crop_image(image im, int dx, int dy, int w, int h)
|
||||
{
|
||||
image crop_image(image im, int dx, int dy, int w, int h)
|
||||
{
|
||||
image cropped = make_image(w, h, im.c);
|
||||
int i, j, k;
|
||||
for(k = 0; k < im.c; ++k){
|
||||
@ -529,7 +549,7 @@ void show_image_cv(image p, const char *name)
|
||||
}
|
||||
}
|
||||
return cropped;
|
||||
}
|
||||
}
|
||||
|
||||
int best_3d_shift_r(image a, image b, int min, int max)
|
||||
{
|
||||
@ -666,7 +686,7 @@ void rgb_to_hsv(image im)
|
||||
v = max;
|
||||
if(max == 0){
|
||||
s = 0;
|
||||
h = -1;
|
||||
h = 0;
|
||||
}else{
|
||||
s = delta/max;
|
||||
if(r == max){
|
||||
@ -677,6 +697,7 @@ void rgb_to_hsv(image im)
|
||||
h = 4 + (r - g) / delta;
|
||||
}
|
||||
if (h < 0) h += 6;
|
||||
h = h/6.;
|
||||
}
|
||||
set_pixel(im, i, j, 0, h);
|
||||
set_pixel(im, i, j, 1, s);
|
||||
@ -694,7 +715,7 @@ void hsv_to_rgb(image im)
|
||||
float f, p, q, t;
|
||||
for(j = 0; j < im.h; ++j){
|
||||
for(i = 0; i < im.w; ++i){
|
||||
h = get_pixel(im, i , j, 0);
|
||||
h = 6 * get_pixel(im, i , j, 0);
|
||||
s = get_pixel(im, i , j, 1);
|
||||
v = get_pixel(im, i , j, 2);
|
||||
if (s == 0) {
|
||||
@ -781,6 +802,18 @@ void scale_image_channel(image im, int c, float v)
|
||||
}
|
||||
}
|
||||
|
||||
void translate_image_channel(image im, int c, float v)
|
||||
{
|
||||
int i, j;
|
||||
for(j = 0; j < im.h; ++j){
|
||||
for(i = 0; i < im.w; ++i){
|
||||
float pix = get_pixel(im, i, j, c);
|
||||
pix = pix+v;
|
||||
set_pixel(im, i, j, c, pix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image binarize_image(image im)
|
||||
{
|
||||
image c = copy_image(im);
|
||||
@ -800,6 +833,19 @@ void saturate_image(image im, float sat)
|
||||
constrain_image(im);
|
||||
}
|
||||
|
||||
void hue_image(image im, float hue)
|
||||
{
|
||||
rgb_to_hsv(im);
|
||||
int i;
|
||||
for(i = 0; i < im.w*im.h; ++i){
|
||||
im.data[i] = im.data[i] + hue;
|
||||
if (im.data[i] > 1) im.data[i] -= 1;
|
||||
if (im.data[i] < 0) im.data[i] += 1;
|
||||
}
|
||||
hsv_to_rgb(im);
|
||||
constrain_image(im);
|
||||
}
|
||||
|
||||
void exposure_image(image im, float sat)
|
||||
{
|
||||
rgb_to_hsv(im);
|
||||
@ -808,6 +854,29 @@ void exposure_image(image im, float sat)
|
||||
constrain_image(im);
|
||||
}
|
||||
|
||||
void distort_image(image im, float hue, float sat, float val)
|
||||
{
|
||||
rgb_to_hsv(im);
|
||||
scale_image_channel(im, 1, sat);
|
||||
scale_image_channel(im, 2, val);
|
||||
int i;
|
||||
for(i = 0; i < im.w*im.h; ++i){
|
||||
im.data[i] = im.data[i] + hue;
|
||||
if (im.data[i] > 1) im.data[i] -= 1;
|
||||
if (im.data[i] < 0) im.data[i] += 1;
|
||||
}
|
||||
hsv_to_rgb(im);
|
||||
constrain_image(im);
|
||||
}
|
||||
|
||||
void random_distort_image(image im, float hue, float saturation, float exposure)
|
||||
{
|
||||
float dhue = rand_uniform(-hue, hue);
|
||||
float dsat = rand_scale(saturation);
|
||||
float dexp = rand_scale(exposure);
|
||||
distort_image(im, dhue, dsat, dexp);
|
||||
}
|
||||
|
||||
void saturate_exposure_image(image im, float sat, float exposure)
|
||||
{
|
||||
rgb_to_hsv(im);
|
||||
@ -876,7 +945,6 @@ image resize_image(image im, int w, int h)
|
||||
return resized;
|
||||
}
|
||||
|
||||
#include "cuda.h"
|
||||
|
||||
void test_resize(char *filename)
|
||||
{
|
||||
@ -885,59 +953,40 @@ void test_resize(char *filename)
|
||||
printf("L2 Norm: %f\n", mag);
|
||||
image gray = grayscale_image(im);
|
||||
|
||||
image sat2 = copy_image(im);
|
||||
saturate_image(sat2, 2);
|
||||
image c1 = copy_image(im);
|
||||
image c2 = copy_image(im);
|
||||
image c3 = copy_image(im);
|
||||
image c4 = copy_image(im);
|
||||
distort_image(c1, .1, 1.5, 1.5);
|
||||
distort_image(c2, -.1, .66666, .66666);
|
||||
distort_image(c3, .1, 1.5, .66666);
|
||||
distort_image(c4, .1, .66666, 1.5);
|
||||
|
||||
image sat5 = copy_image(im);
|
||||
saturate_image(sat5, .5);
|
||||
|
||||
image exp2 = copy_image(im);
|
||||
exposure_image(exp2, 2);
|
||||
|
||||
image exp5 = copy_image(im);
|
||||
exposure_image(exp5, .5);
|
||||
|
||||
image bin = binarize_image(im);
|
||||
|
||||
/*
|
||||
#ifdef GPU
|
||||
image r = resize_image(im, im.w, im.h);
|
||||
image black = make_image(im.w*2 + 3, im.h*2 + 3, 9);
|
||||
image black2 = make_image(im.w, im.h, 3);
|
||||
|
||||
float *r_gpu = cuda_make_array(r.data, r.w*r.h*r.c);
|
||||
float *black_gpu = cuda_make_array(black.data, black.w*black.h*black.c);
|
||||
float *black2_gpu = cuda_make_array(black2.data, black2.w*black2.h*black2.c);
|
||||
shortcut_gpu(3, r.w, r.h, 1, r_gpu, black.w, black.h, 3, black_gpu);
|
||||
//flip_image(r);
|
||||
//shortcut_gpu(3, r.w, r.h, 1, r.data, black.w, black.h, 3, black.data);
|
||||
|
||||
shortcut_gpu(3, black.w, black.h, 3, black_gpu, black2.w, black2.h, 1, black2_gpu);
|
||||
cuda_pull_array(black_gpu, black.data, black.w*black.h*black.c);
|
||||
cuda_pull_array(black2_gpu, black2.data, black2.w*black2.h*black2.c);
|
||||
show_image_layers(black, "Black");
|
||||
show_image(black2, "Recreate");
|
||||
#endif
|
||||
*/
|
||||
image rot = rotate_crop_image(im, -.2618, 1, im.w/2, im.h/2, 0, 0);
|
||||
image rot3 = rotate_crop_image(im, -.2618, 2, im.w, im.h, im.w/2, 0);
|
||||
image rot2 = rotate_crop_image(im, -.2618, 1, im.w, im.h, 0, 0);
|
||||
show_image(rot, "Rotated");
|
||||
show_image(rot2, "base");
|
||||
|
||||
show_image(rot3, "Rotated2");
|
||||
|
||||
/*
|
||||
show_image(im, "Original");
|
||||
show_image(bin, "Binary");
|
||||
show_image(gray, "Gray");
|
||||
show_image(sat2, "Saturation-2");
|
||||
show_image(sat5, "Saturation-.5");
|
||||
show_image(exp2, "Exposure-2");
|
||||
show_image(exp5, "Exposure-.5");
|
||||
*/
|
||||
show_image(c1, "C1");
|
||||
show_image(c2, "C2");
|
||||
show_image(c3, "C3");
|
||||
show_image(c4, "C4");
|
||||
#ifdef OPENCV
|
||||
while(1){
|
||||
float exposure = 1.15;
|
||||
float saturation = 1.15;
|
||||
float hue = .05;
|
||||
|
||||
image c = copy_image(im);
|
||||
|
||||
float dexp = rand_scale(exposure);
|
||||
float dsat = rand_scale(saturation);
|
||||
float dhue = rand_uniform(-hue, hue);
|
||||
|
||||
distort_image(c, dhue, dsat, dexp);
|
||||
show_image(c, "rand");
|
||||
printf("%f %f %f\n", dhue, dsat, dexp);
|
||||
free_image(c);
|
||||
cvWaitKey(0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1180,10 +1229,8 @@ void show_images(image *ims, int n, char *window)
|
||||
image sized = resize_image(m, w, h);
|
||||
*/
|
||||
normalize_image(m);
|
||||
image sized = resize_image(m, m.w, m.h);
|
||||
save_image(sized, window);
|
||||
show_image(sized, window);
|
||||
free_image(sized);
|
||||
save_image(m, window);
|
||||
show_image(m, window);
|
||||
free_image(m);
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,7 @@ void scale_image(image m, float s);
|
||||
image crop_image(image im, int dx, int dy, int w, int h);
|
||||
image random_crop_image(image im, int w, int h);
|
||||
image random_augment_image(image im, float angle, int low, int high, int size);
|
||||
void random_distort_image(image im, float hue, float saturation, float exposure);
|
||||
image resize_image(image im, int w, int h);
|
||||
image resize_min(image im, int min);
|
||||
void translate_image(image m, float s);
|
||||
@ -41,6 +42,7 @@ void rotate_image_cw(image im, int times);
|
||||
void embed_image(image source, image dest, int dx, int dy);
|
||||
void saturate_image(image im, float sat);
|
||||
void exposure_image(image im, float sat);
|
||||
void distort_image(image im, float hue, float sat, float val);
|
||||
void saturate_exposure_image(image im, float sat, float exposure);
|
||||
void hsv_to_rgb(image im);
|
||||
void rgbgr_image(image im);
|
||||
|
237
src/imagenet.c
237
src/imagenet.c
@ -1,237 +0,0 @@
|
||||
#include "network.h"
|
||||
#include "utils.h"
|
||||
#include "parser.h"
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#endif
|
||||
|
||||
void train_imagenet(char *cfgfile, char *weightfile)
|
||||
{
|
||||
data_seed = time(0);
|
||||
srand(time(0));
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
printf("%s\n", base);
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = 1024;
|
||||
char **labels = get_labels("data/inet.labels.list");
|
||||
list *plist = get_paths("data/inet.train.list");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
int N = plist->size;
|
||||
clock_t time;
|
||||
pthread_t load_thread;
|
||||
data train;
|
||||
data buffer;
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.paths = paths;
|
||||
args.classes = 1000;
|
||||
args.n = imgs;
|
||||
args.m = N;
|
||||
args.labels = labels;
|
||||
args.d = &buffer;
|
||||
args.type = OLD_CLASSIFICATION_DATA;
|
||||
|
||||
load_thread = load_data_in_thread(args);
|
||||
int epoch = (*net.seen)/N;
|
||||
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
|
||||
load_thread = load_data_in_thread(args);
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
float loss = train_network(net, train);
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
||||
free_data(train);
|
||||
if(*net.seen/N > epoch){
|
||||
epoch = *net.seen/N;
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(*net.seen%1000 == 0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup",backup_directory,base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.weights", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
free_data(buffer);
|
||||
free_network(net);
|
||||
free_ptrs((void**)labels, 1000);
|
||||
free_ptrs((void**)paths, plist->size);
|
||||
free_list(plist);
|
||||
free(base);
|
||||
}
|
||||
|
||||
void validate_imagenet(char *filename, char *weightfile)
|
||||
{
|
||||
int i = 0;
|
||||
network net = parse_network_cfg(filename);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
srand(time(0));
|
||||
|
||||
char **labels = get_labels("data/inet.labels.list");
|
||||
//list *plist = get_paths("data/inet.suppress.list");
|
||||
list *plist = get_paths("data/inet.val.list");
|
||||
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
int m = plist->size;
|
||||
free_list(plist);
|
||||
|
||||
clock_t time;
|
||||
float avg_acc = 0;
|
||||
float avg_top5 = 0;
|
||||
int splits = 50;
|
||||
int num = (i+1)*m/splits - i*m/splits;
|
||||
|
||||
data val, buffer;
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.paths = paths;
|
||||
args.classes = 1000;
|
||||
args.n = num;
|
||||
args.m = 0;
|
||||
args.labels = labels;
|
||||
args.d = &buffer;
|
||||
args.type = OLD_CLASSIFICATION_DATA;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
for(i = 1; i <= splits; ++i){
|
||||
time=clock();
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
val = buffer;
|
||||
|
||||
num = (i+1)*m/splits - i*m/splits;
|
||||
char **part = paths+(i*m/splits);
|
||||
if(i != splits){
|
||||
args.paths = part;
|
||||
load_thread = load_data_in_thread(args);
|
||||
}
|
||||
printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time));
|
||||
|
||||
time=clock();
|
||||
float *acc = network_accuracies(net, val, 5);
|
||||
avg_acc += acc[0];
|
||||
avg_top5 += acc[1];
|
||||
printf("%d: top1: %f, top5: %f, %lf seconds, %d images\n", i, avg_acc/i, avg_top5/i, sec(clock()-time), val.X.rows);
|
||||
free_data(val);
|
||||
}
|
||||
}
|
||||
|
||||
void test_imagenet(char *cfgfile, char *weightfile, char *filename)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
int i = 0;
|
||||
char **names = get_labels("data/shortnames.txt");
|
||||
clock_t time;
|
||||
int indexes[10];
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input, 256, 256);
|
||||
float *X = im.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
top_predictions(net, 10, indexes);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
for(i = 0; i < 10; ++i){
|
||||
int index = indexes[i];
|
||||
printf("%s: %f\n", names[index], predictions[index]);
|
||||
}
|
||||
free_image(im);
|
||||
if (filename) break;
|
||||
}
|
||||
}
|
||||
|
||||
void run_imagenet(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
char *cfg = argv[3];
|
||||
char *weights = (argc > 4) ? argv[4] : 0;
|
||||
char *filename = (argc > 5) ? argv[5]: 0;
|
||||
if(0==strcmp(argv[2], "test")) test_imagenet(cfg, weights, filename);
|
||||
else if(0==strcmp(argv[2], "train")) train_imagenet(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_imagenet(cfg, weights);
|
||||
}
|
||||
|
||||
/*
|
||||
void train_imagenet_distributed(char *address)
|
||||
{
|
||||
float avg_loss = 1;
|
||||
srand(time(0));
|
||||
network net = parse_network_cfg("cfg/net.cfg");
|
||||
set_learning_network(&net, 0, 1, 0);
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = net.batch;
|
||||
int i = 0;
|
||||
char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list");
|
||||
list *plist = get_paths("/data/imagenet/cls.train.list");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
clock_t time;
|
||||
data train, buffer;
|
||||
pthread_t load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer);
|
||||
while(1){
|
||||
i += 1;
|
||||
|
||||
time=clock();
|
||||
client_update(net, address);
|
||||
printf("Updated: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
normalize_data_rows(train);
|
||||
load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer);
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
|
||||
float loss = train_network(net, train);
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs);
|
||||
free_data(train);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
@ -72,6 +72,7 @@ struct layer{
|
||||
float saturation;
|
||||
float exposure;
|
||||
float shift;
|
||||
float ratio;
|
||||
int softmax;
|
||||
int classes;
|
||||
int coords;
|
||||
@ -82,6 +83,7 @@ struct layer{
|
||||
int joint;
|
||||
int noadjust;
|
||||
int reorg;
|
||||
int log;
|
||||
|
||||
float alpha;
|
||||
float beta;
|
||||
|
@ -18,7 +18,7 @@ image get_maxpool_delta(maxpool_layer l)
|
||||
return float_to_image(w,h,c,l.delta);
|
||||
}
|
||||
|
||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride)
|
||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
|
||||
{
|
||||
fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d size, %d stride\n", h,w,c,size,stride);
|
||||
maxpool_layer l = {0};
|
||||
@ -27,8 +27,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.out_w = (w-1)/stride + 1;
|
||||
l.out_h = (h-1)/stride + 1;
|
||||
l.pad = padding;
|
||||
l.out_w = (w + 2*padding - size + 1)/stride + 1;
|
||||
l.out_h = (h + 2*padding - size + 1)/stride + 1;
|
||||
l.out_c = c;
|
||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||
l.inputs = h*w*c;
|
||||
@ -48,13 +49,12 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
||||
|
||||
void resize_maxpool_layer(maxpool_layer *l, int w, int h)
|
||||
{
|
||||
int stride = l->stride;
|
||||
l->h = h;
|
||||
l->w = w;
|
||||
l->inputs = h*w*l->c;
|
||||
|
||||
l->out_w = (w-1)/stride + 1;
|
||||
l->out_h = (h-1)/stride + 1;
|
||||
l->out_w = (w + 2*l->pad - l->size + 1)/l->stride + 1;
|
||||
l->out_h = (h + 2*l->pad - l->size + 1)/l->stride + 1;
|
||||
l->outputs = l->out_w * l->out_h * l->c;
|
||||
int output_size = l->outputs * l->batch;
|
||||
|
||||
@ -75,11 +75,11 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
|
||||
void forward_maxpool_layer(const maxpool_layer l, network_state state)
|
||||
{
|
||||
int b,i,j,k,m,n;
|
||||
int w_offset = (-l.size-1)/2 + 1;
|
||||
int h_offset = (-l.size-1)/2 + 1;
|
||||
int w_offset = -l.pad;
|
||||
int h_offset = -l.pad;
|
||||
|
||||
int h = (l.h-1)/l.stride + 1;
|
||||
int w = (l.w-1)/l.stride + 1;
|
||||
int h = l.out_h;
|
||||
int w = l.out_w;
|
||||
int c = l.c;
|
||||
|
||||
for(b = 0; b < l.batch; ++b){
|
||||
@ -112,8 +112,8 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state)
|
||||
void backward_maxpool_layer(const maxpool_layer l, network_state state)
|
||||
{
|
||||
int i;
|
||||
int h = (l.h-1)/l.stride + 1;
|
||||
int w = (l.w-1)/l.stride + 1;
|
||||
int h = l.out_h;
|
||||
int w = l.out_w;
|
||||
int c = l.c;
|
||||
for(i = 0; i < h*w*c*l.batch; ++i){
|
||||
int index = l.indexes[i];
|
||||
|
@ -9,7 +9,7 @@
|
||||
typedef layer maxpool_layer;
|
||||
|
||||
image get_maxpool_image(maxpool_layer l);
|
||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride);
|
||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
|
||||
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
|
||||
void forward_maxpool_layer(const maxpool_layer l, network_state state);
|
||||
void backward_maxpool_layer(const maxpool_layer l, network_state state);
|
||||
|
@ -7,10 +7,10 @@ extern "C" {
|
||||
#include "cuda.h"
|
||||
}
|
||||
|
||||
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *input, float *output, int *indexes)
|
||||
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
|
||||
{
|
||||
int h = (in_h-1)/stride + 1;
|
||||
int w = (in_w-1)/stride + 1;
|
||||
int h = (in_h + 2*pad - size + 1)/stride + 1;
|
||||
int w = (in_w + 2*pad - size + 1)/stride + 1;
|
||||
int c = in_c;
|
||||
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
@ -24,8 +24,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
|
||||
id /= c;
|
||||
int b = id;
|
||||
|
||||
int w_offset = (-size-1)/2 + 1;
|
||||
int h_offset = (-size-1)/2 + 1;
|
||||
int w_offset = -pad;
|
||||
int h_offset = -pad;
|
||||
|
||||
int out_index = j + w*(i + h*(k + c*b));
|
||||
float max = -INFINITY;
|
||||
@ -47,10 +47,10 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
|
||||
indexes[out_index] = max_i;
|
||||
}
|
||||
|
||||
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *delta, float *prev_delta, int *indexes)
|
||||
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
|
||||
{
|
||||
int h = (in_h-1)/stride + 1;
|
||||
int w = (in_w-1)/stride + 1;
|
||||
int h = (in_h + 2*pad - size + 1)/stride + 1;
|
||||
int w = (in_w + 2*pad - size + 1)/stride + 1;
|
||||
int c = in_c;
|
||||
int area = (size-1)/stride;
|
||||
|
||||
@ -66,8 +66,8 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
|
||||
id /= in_c;
|
||||
int b = id;
|
||||
|
||||
int w_offset = (-size-1)/2 + 1;
|
||||
int h_offset = (-size-1)/2 + 1;
|
||||
int w_offset = -pad;
|
||||
int h_offset = -pad;
|
||||
|
||||
float d = 0;
|
||||
int l, m;
|
||||
@ -86,13 +86,13 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
|
||||
|
||||
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
||||
{
|
||||
int h = (layer.h-1)/layer.stride + 1;
|
||||
int w = (layer.w-1)/layer.stride + 1;
|
||||
int h = layer.out_h;
|
||||
int w = layer.out_w;
|
||||
int c = layer.c;
|
||||
|
||||
size_t n = h*w*c*layer.batch;
|
||||
|
||||
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu);
|
||||
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
@ -100,7 +100,7 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st
|
||||
{
|
||||
size_t n = layer.h*layer.w*layer.c*layer.batch;
|
||||
|
||||
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu);
|
||||
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
|
@ -420,7 +420,7 @@ int resize_network(network *net, int w, int h)
|
||||
net->h = h;
|
||||
int inputs = 0;
|
||||
size_t workspace_size = 0;
|
||||
//fprintf(stderr, "Resizing to %d x %d...", w, h);
|
||||
//fprintf(stderr, "Resizing to %d x %d...\n", w, h);
|
||||
//fflush(stderr);
|
||||
for (i = 0; i < net->n; ++i){
|
||||
layer l = net->layers[i];
|
||||
|
@ -43,6 +43,7 @@ typedef struct network{
|
||||
float angle;
|
||||
float exposure;
|
||||
float saturation;
|
||||
float hue;
|
||||
|
||||
int gpu_index;
|
||||
|
||||
|
21
src/parser.c
21
src/parser.c
@ -2,6 +2,7 @@
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "blas.h"
|
||||
#include "parser.h"
|
||||
#include "assert.h"
|
||||
#include "activations.h"
|
||||
@ -147,7 +148,10 @@ convolutional_layer parse_convolutional(list *options, size_params params)
|
||||
int n = option_find_int(options, "filters",1);
|
||||
int size = option_find_int(options, "size",1);
|
||||
int stride = option_find_int(options, "stride",1);
|
||||
int pad = option_find_int(options, "pad",0);
|
||||
int pad = option_find_int_quiet(options, "pad",0);
|
||||
int padding = option_find_int_quiet(options, "padding",0);
|
||||
if(pad) padding = size/2;
|
||||
|
||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||
ACTIVATION activation = get_activation(activation_s);
|
||||
|
||||
@ -161,7 +165,7 @@ convolutional_layer parse_convolutional(list *options, size_params params)
|
||||
int binary = option_find_int_quiet(options, "binary", 0);
|
||||
int xnor = option_find_int_quiet(options, "xnor", 0);
|
||||
|
||||
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation, batch_normalize, binary, xnor);
|
||||
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor);
|
||||
layer.flipped = option_find_int_quiet(options, "flipped", 0);
|
||||
layer.dot = option_find_float_quiet(options, "dot", 0);
|
||||
|
||||
@ -234,9 +238,16 @@ layer parse_region(list *options, size_params params)
|
||||
int coords = option_find_int(options, "coords", 4);
|
||||
int classes = option_find_int(options, "classes", 20);
|
||||
int num = option_find_int(options, "num", 1);
|
||||
|
||||
params.w = option_find_int(options, "side", params.w);
|
||||
params.h = option_find_int(options, "side", params.h);
|
||||
|
||||
layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
|
||||
assert(l.outputs == params.inputs);
|
||||
|
||||
l.log = option_find_int_quiet(options, "log", 0);
|
||||
l.sqrt = option_find_int_quiet(options, "sqrt", 0);
|
||||
|
||||
l.softmax = option_find_int(options, "softmax", 0);
|
||||
l.max_boxes = option_find_int_quiet(options, "max",30);
|
||||
l.jitter = option_find_float(options, "jitter", .2);
|
||||
@ -278,6 +289,7 @@ cost_layer parse_cost(list *options, size_params params)
|
||||
COST_TYPE type = get_cost_type(type_s);
|
||||
float scale = option_find_float_quiet(options, "scale",1);
|
||||
cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
|
||||
layer.ratio = option_find_float_quiet(options, "ratio",0);
|
||||
return layer;
|
||||
}
|
||||
|
||||
@ -324,6 +336,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
|
||||
{
|
||||
int stride = option_find_int(options, "stride",1);
|
||||
int size = option_find_int(options, "size",stride);
|
||||
int padding = option_find_int_quiet(options, "padding", (size-1)/2);
|
||||
|
||||
int batch,h,w,c;
|
||||
h = params.h;
|
||||
@ -332,7 +345,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
|
||||
batch=params.batch;
|
||||
if(!(h && w && c)) error("Layer before maxpool layer must output image.");
|
||||
|
||||
maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride);
|
||||
maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding);
|
||||
return layer;
|
||||
}
|
||||
|
||||
@ -486,6 +499,7 @@ void parse_net_options(list *options, network *net)
|
||||
net->angle = option_find_float_quiet(options, "angle", 0);
|
||||
net->saturation = option_find_float_quiet(options, "saturation", 1);
|
||||
net->exposure = option_find_float_quiet(options, "exposure", 1);
|
||||
net->hue = option_find_float_quiet(options, "hue", 0);
|
||||
|
||||
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
|
||||
|
||||
@ -1085,6 +1099,7 @@ void load_convolutional_weights(layer l, FILE *fp)
|
||||
fread(l.rolling_variance, sizeof(float), l.n, fp);
|
||||
}
|
||||
fread(l.filters, sizeof(float), num, fp);
|
||||
//if(l.c == 3) scal_cpu(num, 1./256, l.filters, 1);
|
||||
if (l.flipped) {
|
||||
transpose_matrix(l.filters, l.c*l.size*l.size, l.n);
|
||||
}
|
||||
|
@ -80,8 +80,8 @@ box get_region_box(float *x, int index, int i, int j, int w, int h, int adjust,
|
||||
b.w = logistic_activate(x[index + 2]);
|
||||
b.h = logistic_activate(x[index + 3]);
|
||||
}
|
||||
if(adjust && b.w < .01) b.w = .01;
|
||||
if(adjust && b.h < .01) b.h = .01;
|
||||
//if(adjust && b.w < .01) b.w = .01;
|
||||
//if(adjust && b.h < .01) b.h = .01;
|
||||
return b;
|
||||
}
|
||||
|
||||
@ -149,6 +149,7 @@ void forward_region_layer(const region_layer l, network_state state)
|
||||
l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
|
||||
if(best_iou > .5) l.delta[index + 4] = 0;
|
||||
|
||||
/*
|
||||
if(*(state.net.seen) < 6400){
|
||||
box truth = {0};
|
||||
truth.x = (i + .5)/l.w;
|
||||
@ -157,6 +158,7 @@ void forward_region_layer(const region_layer l, network_state state)
|
||||
truth.h = .5;
|
||||
delta_region_box(truth, l.output, index, i, j, l.w, l.h, l.delta, LOG, 1);
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -45,6 +45,11 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
|
||||
args.d = &buffer;
|
||||
args.type = TAG_DATA;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
fprintf(stderr, "%d classes\n", net.outputs);
|
||||
|
||||
load_thread = load_data_in_thread(args);
|
||||
|
@ -585,6 +585,13 @@ float rand_uniform(float min, float max)
|
||||
return ((float)rand()/RAND_MAX * (max - min)) + min;
|
||||
}
|
||||
|
||||
float rand_scale(float s)
|
||||
{
|
||||
float scale = rand_uniform(1, s);
|
||||
if(rand()%2) return scale;
|
||||
return 1./scale;
|
||||
}
|
||||
|
||||
float **one_hot_encode(float *a, int n, int k)
|
||||
{
|
||||
int i;
|
||||
|
@ -42,6 +42,7 @@ float mse_array(float *a, int n);
|
||||
float rand_normal();
|
||||
size_t rand_size_t();
|
||||
float rand_uniform(float min, float max);
|
||||
float rand_scale(float s);
|
||||
int rand_int(int min, int max);
|
||||
float sum_array(float *a, int n);
|
||||
float mean_array(float *a, int n);
|
||||
|
@ -9,9 +9,9 @@
|
||||
|
||||
void extract_voxel(char *lfile, char *rfile, char *prefix)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
int w = 1920;
|
||||
int h = 1080;
|
||||
#ifdef OPENCV
|
||||
int shift = 0;
|
||||
int count = 0;
|
||||
CvCapture *lcap = cvCaptureFromFile(lfile);
|
||||
@ -39,7 +39,7 @@ void extract_voxel(char *lfile, char *rfile, char *prefix)
|
||||
}
|
||||
|
||||
#else
|
||||
printf("need OpenCV for extraction\n");
|
||||
printf("need OpenCV for extraction\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -54,6 +54,11 @@ void train_yolo(char *cfgfile, char *weightfile)
|
||||
args.d = &buffer;
|
||||
args.type = REGION_DATA;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
|
Loading…
Reference in New Issue
Block a user